In [1]:
import os 
import numpy as np 
import pandas as pd 

### handle data from Nansen NFT Paradise 
- I don't have the csv access, so I copy-pasted frm the website and rehandled the data 
- already filtered (floor price between 0.05 ~ 0.12)
- from https://pro.nansen.ai/nft-paradise


In [255]:
# load data 
data_path = os.path.join(os.getcwd(), "data")
file_name = os.path.join(data_path, "nansen_nft_paradise_1.xlsx")
sheet_to_df_map = pd.read_excel(file_name, sheet_name=None, header=None)

# reshape data 
col_names = ["NFT_collections", "Volumne_ETH", "Mkt_Cap_ETH", "Avg_Price_ETH", 
             "Floor_ETH", "Floor_24H", "Floor_3D", "#Wallets", "Deployed"]
lst_data = []
for sheet in sheet_to_df_map:
    df_new = pd.DataFrame(data = sheet_to_df_map[sheet].values.reshape(-1, 9), columns=col_names)
    lst_data.append(df_new)

# concat and save data 
df_data = pd.concat(lst_data)
df_data.reset_index(inplace=True, drop=True)
df_data = df_data.loc[df_data["NFT_collections"] != "NFT Collection"]
df_data.drop_duplicates(subset= "NFT_collections", inplace=True)
df_data.to_excel(os.path.join(data_path, "nansen_nft_paradise_handled.xlsx"))
df_data.head()

Unnamed: 0,NFT_collections,Volumne_ETH,Mkt_Cap_ETH,Avg_Price_ETH,Floor_ETH,Floor_24H,Floor_3D,#Wallets,Deployed
0,TrippyToadz,426,154,0.0735,0.068,0,-,2496,40h ago
1,Juicebox Frens,211,950,0.1363,0.0859,-0.2,0.24,2890,5d ago
2,Mini Supers,149,139,0.0728,0.084,-,-,1650,37h ago
3,MPL Official,75,2299,0.2299,0.11,-0.3,-0.46,4033,6d ago
4,ToonSquad,65,487,0.0809,0.0558,-0.0084,-0.3,2454,5d ago


### Twitter hotness filtration 

cretiria (can be customized)
- mean_tweets_per_week
- mean_tweets_per_week_exclu_retweets
- mean_like_count
- mean_retweet_count
- mean_reply_count

In [51]:
import datetime, tqdm 
from twitter_giveaway_bot import *

# connect to twitter api v2 
client = connect()

In [260]:
# Twitter hotness filtration 
def process_project_from_tweeter(client, project_name):

    # print("-*80")
    # print(f"project_name: {project_name}")
    # init dataframe to store value 
    df_data = pd.DataFrame(columns=["account", "NFT_collections", "followers_count", "following_count", "tweet_count", 
                                    "mean_tweets_per_week", "mean_tweets_per_week_exclu_retweets", "mean_like_count", "mean_retweet_count", "mean_reply_count"])
    df_data.set_index("account", inplace=True)

    # search related recent tweets 
    recent_tweets =  client.search_recent_tweets(project_name, user_auth=True)
    if recent_tweets.data is not None:
        # find all twitter accounts in the recent tweets  
        lst = [re.findall(r"@([a-zA-Z0-9_]+)", tweet["text"]) for tweet in recent_tweets.data]
        potential_twitter_accounts = list(set([item for sublist in lst for item in sublist]))
        # print(f"potential_twitter_accounts: {potential_twitter_accounts}")
        # find the true one 
        # assume that the twitter account should at least contains one part of the project name  
        lst2 = [account.lower() for account in potential_twitter_accounts]
        project_name_lower =  project_name.lower().split(" ")
        true_potential_twitter_accounts = set([potential_twitter_accounts[i] for i in range(len(lst2)) for j in project_name_lower if j in lst2[i]])
        # print(f"true_potential_twitter_accounts: {true_potential_twitter_accounts}")
        
        # if cant find the twitter accounts and the set is empty, we escape
        if true_potential_twitter_accounts:
            for account in true_potential_twitter_accounts:
                # get account info 
                user = client.get_user(username=account, user_fields=["public_metrics"], user_auth=True).data
                if user:
                    account_info = user["public_metrics"]
                    followers_count = account_info["followers_count"]
                    following_count = account_info["following_count"]
                    tweet_count = account_info["tweet_count"]
                    
                    # get account recent tweets 
                    latest_tweets = client.get_users_tweets(
                        user["id"],
                        user_auth=True,
                        max_results=100,
                        tweet_fields=["text", "public_metrics"],
                        start_time = datetime.datetime.now() - datetime.timedelta(days=14)
                    ).data

                    if latest_tweets:
                        # statistics 
                        # !! for average tweets per week, since we can only access 100 recent tweets, if the number of recent tweets
                        # is larger than 100, this measure is underestimated  
                        mean_tweets_per_week = len(latest_tweets) / 14 * 7
                        total_number_of_tweets_exclu_retweets = sum([1 if "RT" not in one_tweet["text"] else 0 for one_tweet in latest_tweets])
                        mean_tweets_per_week_exclu_retweets = total_number_of_tweets_exclu_retweets  / 14 * 7
                        mean_like_count = np.mean([one_tweet["public_metrics"]["like_count"] for one_tweet in latest_tweets])
                        mean_retweet_count = np.mean([one_tweet["public_metrics"]["retweet_count"] for one_tweet in latest_tweets])
                        mean_reply_count = np.mean([one_tweet["public_metrics"]["reply_count"] for one_tweet in latest_tweets])

                        df_data.loc[account, :] = [project_name, followers_count, following_count, tweet_count, 
                                                mean_tweets_per_week, mean_tweets_per_week_exclu_retweets, mean_like_count, mean_retweet_count, mean_reply_count]

    # else:
    #     print(f"{project_name} dont have recent tweets")
    
    return df_data

# test 
project_name = "TrippyToadz"
process_project_from_tweeter(client, "TrippyToadz")

Unnamed: 0_level_0,NFT_collections,followers_count,following_count,tweet_count,mean_tweets_per_week,mean_tweets_per_week_exclu_retweets,mean_like_count,mean_retweet_count,mean_reply_count
account,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
trippytoadznft,TrippyToadz,16490,910,896,50.0,39.0,84.1,65.89,41.13


In [261]:
lst_df_tweets_stats = []
for project_name in tqdm.tqdm(df_data["NFT_collections"][:]):
    try:
        df_tweets_stats = process_project_from_tweeter(client, project_name)
        lst_df_tweets_stats.append(df_tweets_stats)
    except Exception:
        # twitter has request limits for API 
        # after 15 minites 
        print("Need to relax for 16 minites")
        time.sleep(16 * 60 * 60)
    time.sleep(0.1)
df_tweets_stats_all = pd.concat(lst_df_tweets_stats)
df_tweets_stats_all.reset_index(inplace=True)
df_tweets_stats_all.drop_duplicates(subset="account", inplace=True)

 13%|█▎        | 20/152 [00:14<01:21,  1.62it/s]

Need to relax for 16 minites


In [230]:
# merge data and statistics 
df_results = pd.merge(df_data, df_tweets_stats_all, how="inner")
df_results

Unnamed: 0,NFT_collections,Volumne_ETH,Mkt_Cap_ETH,Avg_Price_ETH,Floor_ETH,Floor_24H,Floor_3D,#Wallets,Deployed,account,followers_count,following_count,tweet_count,mean_tweets_per_week,mean_tweets_per_week_exclu_retweets,mean_like_count,mean_retweet_count,mean_reply_count
0,TrippyToadz,426,154,0.0735,0.068,0,-,2496,40h ago,trippytoadznft,16462,910,894,50.0,39.0,84.91,67.31,42.37
1,Juicebox Frens,211,950,0.1363,0.0859,-0.2,0.24,2890,5d ago,Juiceboxfrens,5732,1326,795,50.0,36.0,11.04,2.21,1.68
2,Mini Supers,149,139,0.0728,0.084,-,-,1650,37h ago,minisupersnft,10505,2,109,26.5,12.5,99.188679,157.433962,62.320755
3,ToonSquad,65,487,0.0809,0.0558,-0.0084,-0.3,2454,5d ago,ToonSquadNFT,51322,10,516,50.0,14.5,13.18,17.04,2.11
4,Lucky Ducky,57,9.65,0.0825,0.099,-,-,359,22h ago,luckyduckynft,17438,199,1208,50.0,30.0,16.51,23.64,3.85
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108,DarkHorizon,1.69,354,0.0407,0.0469,1.34,1.12,2762,163d ago,DarkHorizonNFT,13389,520,588,45.5,32.5,8.054945,2.912088,1.032967
109,The Royal Cubs,1.66,490,0.0552,0.053,0.17,0.0127,4570,61d ago,TheRoyalCubs,21287,3,174,7.5,6.0,99.4,63.733333,40.533333
110,Seabums,1.62,222,0.038,0.03,0.0843,0.5,3198,109d ago,SeabumsNFTs,5184,422,403,19.5,12.5,12.384615,27.641026,3.102564
111,Guardians of the Metaverse,1.61,804,0.0804,0.06,0.0942,0.41,4161,198d ago,TheBinanceNFT,626345,312,1804,50.0,31.0,121.13,252.43,64.46


In [228]:
# merge data and statistics 
df_results = pd.merge(df_data, df_tweets_stats_all, how="inner")

# filtration 
con_followers_count = df_results.loc[:, "followers_count"] > 40000
con_mean_tweets_per_week_exclu_retweets = df_results.loc[:, "mean_tweets_per_week_exclu_retweets"] > 2
con_mean_like_count = df_results.loc[:, "mean_like_count"] > 10
con_mean_retweet_count = df_results.loc[:, "mean_retweet_count"] > 10
con_mean_reply_count= df_results.loc[:, "mean_reply_count"] > 10
print(f"% con_followers_count: {np.mean(con_followers_count)}")
print(f"% con_mean_tweets_per_week_exclu_retweets: {np.mean(con_mean_tweets_per_week_exclu_retweets)}")
print(f"% con_mean_like_count: {np.mean(con_mean_like_count)}")
print(f"% con_mean_retweet_count: {np.mean(con_mean_retweet_count)}")
print(f"% con_mean_reply_count: {np.mean(con_mean_reply_count)}")

# combind condition 
con_all = con_followers_count & con_mean_tweets_per_week_exclu_retweets & con_mean_like_count & con_mean_retweet_count & con_mean_reply_count
df_results = df_results.loc[con_all, :]
print(f"there are {df_results.count()[0]} satisfying the twitter condition")
df_results.to_excel(os.path.join(os.getcwd(), "results", "results_nft_project.xlsx"))
df_results

% con_followers_count: 0.3893805309734513
% con_mean_tweets_per_week_exclu_retweets: 0.9380530973451328
% con_mean_like_count: 0.7610619469026548
% con_mean_retweet_count: 0.7433628318584071
% con_mean_reply_count: 0.504424778761062
there are 33 satisfying the twitter condition


Unnamed: 0,NFT_collections,Volumne_ETH,Mkt_Cap_ETH,Avg_Price_ETH,Floor_ETH,Floor_24H,Floor_3D,#Wallets,Deployed,account,followers_count,following_count,tweet_count,mean_tweets_per_week,mean_tweets_per_week_exclu_retweets,mean_like_count,mean_retweet_count,mean_reply_count
5,Villagers of XOLO,50,3274,0.2669,0.11,-0.2,-0.44,4858,10d ago,PlanetXOLO,45899,2,44,12.5,9.5,307.32,118.0,18.36
9,Froggy Friends,13,254,0.0572,0.0569,0.0626,-0.15,2485,12d ago,FroggyFriendNFT,44967,160,195,50.0,28.5,227.5,334.51,149.78
11,Squishiverse,11,1335,0.1502,0.1,-0.2,0.0903,1960,40d ago,SquishiverseNFT,60164,70,305,50.0,32.0,118.3,121.34,93.46
13,Rubber Duck Bath Party,8.47,974,0.0974,0.085,-0.25,-0.34,5929,24d ago,RubberDuckBP,60771,4,1086,39.0,30.0,154.282051,74.384615,27.641026
14,Wabi Sabi Collective,8.42,673,0.1186,0.105,-0.13,0.28,1919,20d ago,WabiSabiNFT,105031,17,397,32.5,23.0,994.815385,942.446154,825.784615
15,Crypto Bears,8.03,591,0.0887,0.079,1.03,0.58,3796,39d ago,crypto,858091,98,9939,50.0,40.5,45.7,20.14,31.12
16,Crypto Bears,8.03,591,0.0887,0.079,1.03,0.58,3796,39d ago,FancyBearsMeta,43078,1420,2143,50.0,30.5,128.76,105.99,30.31
18,Crypto Bears,8.03,591,0.0887,0.079,1.03,0.58,3796,39d ago,crypto,858091,98,9939,50.0,40.5,45.72,20.14,31.12
19,Crypto Bears,8.03,591,0.0887,0.079,1.03,0.58,3796,39d ago,FancyBearsMeta,43077,1420,2143,50.0,30.5,128.76,105.99,30.31
21,Wulfz,7.98,1082,0.1376,0.093,0.45,0.35,991,85d ago,Wulfznft,76456,13,434,32.5,17.0,362.0,353.923077,344.138462
