In [25]:
import pandas as pd
import math

##Add filter to remove collections with less than X sales

df_ratings = pd.concat(
    [pd.read_csv('nft-buys-'+str(x)+'.csv') for x in range(0,15)]
    , axis = 0
    , ignore_index = True
).sort_values(
    by = 'NO_BUYS'
    ,ascending = False
).drop_duplicates(
    subset=['BUYER_ADDRESS','NFT_ADDRESS']
    , keep='first'
    , ignore_index=False
).reset_index(
    drop = True
)

#Get the maximum buys on a single collection per user
df_user_buys = df_ratings.groupby('BUYER_ADDRESS')['NO_BUYS'].max().rename('MAX_BUYS')

df_ratings = df_ratings.merge(df_user_buys, how='inner',on='BUYER_ADDRESS')

del df_user_buys

#Normalize N° buys per user per collection to derive a rating metric
df_ratings['ratings_lin'] = df_ratings.NO_BUYS / df_ratings.MAX_BUYS

#sqrt ratings to accentuate weight of at least 1 buy
df_ratings['ratings'] = df_ratings.ratings_lin.apply(lambda x : math.sqrt(x))

print("df size", len(df_ratings.BUYER_ADDRESS.tolist()))
print("n° collections", len(df_ratings.NFT_ADDRESS.unique()))
print("n° users", len(df_ratings.BUYER_ADDRESS.unique()))
print("n° transactions", int(df_ratings.NO_BUYS.sum()))

df size 1499889
n° collections 3774
n° users 117267
n° transactions 3794085.0


In [2]:
#Create a mapping from NFT collection address to an incremental product_id
nft_address_to_product_id = pd.DataFrame(
    df_ratings.NFT_ADDRESS.drop_duplicates()
).reset_index(drop=True).reset_index().rename(columns={'index':'product_id'})

df_names = pd.read_csv('nft-names.csv')

df_names = df_names.sort_values(
    by='PROJECT_NAME',
    ascending = False
).drop_duplicates(
    subset = ['NFT_ADDRESS']
)

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load and pre-process the data
data = df_ratings.merge(
    nft_address_to_product_id
    ,how='inner'
    , on='NFT_ADDRESS'
).dropna().rename(
    columns = {'BUYER_ADDRESS': 'user_id','ratings':'rating'}
)[['user_id','product_id','rating']].copy()

#Compute the user/product matrix that stores user ratings (=buys) to products (=NFT collections)
data = data.pivot_table(index='user_id', columns='product_id', values='rating').fillna(0)

# Compute the item-item similarity matrix using cosine similarity between all the collections
item_similarity = pd.DataFrame(cosine_similarity(data.T))

In [19]:
def get_most_similar_products(product_id, rating):
    #For a given product_id and rating value provided as input, returns the top 20 similar products (=NFT collections)
    similar_scores = item_similarity.iloc[product_id]*rating
    similar_scores = similar_scores.sort_values(ascending=False)
    return similar_scores[similar_scores.index != product_id].head(20)

In [20]:
import random 
def get_recommendations(user_id):
    user_ratings = data.loc[user_id].dropna()
    recommendations = pd.Series()

    for product_id, rating in user_ratings[user_ratings>0].sort_values(ascending = False).head(10).items():
        similar_products = get_most_similar_products(product_id, rating)
        recommendations = pd.concat([recommendations,similar_products])
    
    # Aggregate the recommendations
    recommendations = recommendations.groupby(recommendations.index).sum()
    recommendations = recommendations.sort_values(ascending=False)
    return recommendations.head(20)

def get_top_collection(user):
    #randomized with top 3 collections
    collections = df_ratings[df_ratings.BUYER_ADDRESS == user]['NFT_ADDRESS'].head(3).to_list()
    weights= [1,1,1]
    return random.choices(collections, weights)[0]
    

In [33]:
#Let's test how many of our recommendenations are "valid", i.e. recommendations in which the user has never invested
user_test= 100
validated_recs = 0
max_valid_recs = 0
total_recs = user_test * 20
i = 1
for user in df_ratings.BUYER_ADDRESS.drop_duplicates().head(user_test).tolist():
    print('user', i)
    recs = get_recommendations(user)
    df_recs = pd.DataFrame(recs).reset_index().rename(columns={'index': 'product_id'})
    df_recs = df_recs.merge(nft_address_to_product_id, how='inner', on = 'product_id')
    
    df_recs['already_invested'] = df_recs['NFT_ADDRESS'].apply(lambda x : True if x in df_ratings[df_ratings.BUYER_ADDRESS == user].NFT_ADDRESS.unique() else False)
    print('invested recs:', df_recs['already_invested'].sum())
    
    validated_recs += df_recs['already_invested'].sum()
    max_valid_recs += len(df_ratings[df_ratings.BUYER_ADDRESS == user].NFT_ADDRESS.unique())
    i+=1

print("validated recs:", validated_recs)
print('validated vs max validatable:', validated_recs/max_valid_recs)
print('ratio:', validated_recs/total_recs)


user 1


invested recs: 3
user 2


invested recs: 2
user 3


invested recs: 1
user 4


invested recs: 6
user 5


invested recs: 1
user 6


invested recs: 4
user 7


invested recs: 9
user 8


invested recs: 1
user 9


invested recs: 11
user 10


invested recs: 3
user 11


invested recs: 6
user 12


invested recs: 3
user 13


invested recs: 1
user 14


invested recs: 7
user 15


invested recs: 2
user 16


invested recs: 3
user 17


invested recs: 5
user 18


invested recs: 7
user 19


invested recs: 2
user 20


invested recs: 4
user 21


invested recs: 2
user 22


invested recs: 2
user 23


invested recs: 10
user 24


invested recs: 4
user 25


invested recs: 2
user 26


invested recs: 4
user 27


invested recs: 3
user 28


invested recs: 9
user 29


invested recs: 17
user 30


invested recs: 4
user 31


invested recs: 1
user 32


invested recs: 0
user 33


invested recs: 1
user 34


invested recs: 1
user 35


invested recs: 3
user 36


invested recs: 7
user 37


invested recs: 8
user 38


invested recs: 3
user 39


invested recs: 4
user 40


invested recs: 5
user 41


invested recs: 2
user 42


invested recs: 1
user 43


invested recs: 6
user 44


invested recs: 2
user 45


invested recs: 8
user 46


invested recs: 3
user 47


invested recs: 4
user 48


invested recs: 2
user 49


invested recs: 0
user 50


invested recs: 6
user 51


invested recs: 3
user 52


invested recs: 1
user 53


invested recs: 8
user 54


invested recs: 7
user 55


invested recs: 5
user 56


invested recs: 3
user 57


invested recs: 6
user 58


invested recs: 0
user 59


invested recs: 8
user 60


invested recs: 9
user 61


invested recs: 4
user 62


invested recs: 9
user 63


invested recs: 2
user 64


invested recs: 4
user 65


invested recs: 9
user 66


invested recs: 4
user 67


invested recs: 1
user 68


invested recs: 15
user 69


invested recs: 3
user 70


invested recs: 4
user 71


invested recs: 4
user 72


invested recs: 8
user 73


invested recs: 5
user 74


invested recs: 7
user 75


invested recs: 3
user 76


invested recs: 4
user 77


invested recs: 9
user 78


invested recs: 4
user 79


invested recs: 7
user 80


invested recs: 10
user 81


invested recs: 3
user 82


invested recs: 5
user 83


invested recs: 4
user 84


invested recs: 4
user 85


invested recs: 4
user 86


invested recs: 11
user 87


invested recs: 17
user 88


invested recs: 5
user 89


invested recs: 6
user 90


invested recs: 0
user 91


invested recs: 1
user 92


invested recs: 8
user 93


invested recs: 2
user 94


invested recs: 7
user 95


invested recs: 5
user 96


invested recs: 6
user 97


invested recs: 4
user 98


invested recs: 10
user 99


invested recs: 10
user 100


invested recs: 4
validated recs: 487
validated vs max validatable: 0.2200632625395391
ratio: 0.2435



In [22]:
# For demo: package all results into a simple function taking the user address as input and returns a list of recommended collections
def recommenderNFT(user):
    top_collection = get_top_collection(user)
    top_collection_id = nft_address_to_product_id.loc[nft_address_to_product_id.NFT_ADDRESS == top_collection, 'product_id']
    other_collections = pd.Series(
        item_similarity.iloc[top_collection_id].values[0]
    ).rename('rating').sort_values(ascending = False).head(10).reset_index().merge(
        nft_address_to_product_id,
        how = 'inner',
        left_on = 'index',
        right_on = 'product_id'
    ).merge(
        df_names,
        how = 'inner',
        on = 'NFT_ADDRESS'
    )[1:]['PROJECT_NAME'].to_list()
    
    print('user:',user)
    print('Because you invested in', df_names.at[df_names.loc[df_names.NFT_ADDRESS==top_collection].index[0], 'PROJECT_NAME'])
    print('You might like', other_collections)

In [38]:
user = '0x1096b85a3421794c801bdaa757efe5ef6e0ca135'
recommenderNFT(user)

user: 0x1096b85a3421794c801bdaa757efe5ef6e0ca135
Because you invested in degen toonz
You might like ['toonz minter rewards card', 'degen toonz', 'detonated toonz', 'lilhottie', 'the uncanny country club', 'angel baby hit squad', 'jarritos', 'wulf boy social club', 'killabears']
