In [25]:
import pandas as pd
import math

##Add filter to remove collections with less than X sales

df_ratings = pd.concat(
    [pd.read_csv('nft-buys-'+str(x)+'.csv') for x in range(0,15)]
    , axis = 0
    , ignore_index = True
).sort_values(
    by = 'NO_BUYS'
    ,ascending = False
).drop_duplicates(
    subset=['BUYER_ADDRESS','NFT_ADDRESS']
    , keep='first'
    , ignore_index=False
).reset_index(
    drop = True
)

df_user_buys = df_ratings.groupby('BUYER_ADDRESS')['NO_BUYS'].max().rename('MAX_BUYS')

df_ratings = df_ratings.merge(df_user_buys, how='inner',on='BUYER_ADDRESS')

del df_user_buys

df_ratings['ratings_lin'] = df_ratings.NO_BUYS / df_ratings.MAX_BUYS

#sqrt ratings to accentuate weight of at least 1 buy
df_ratings['ratings'] = df_ratings.ratings_lin.apply(lambda x : math.sqrt(x))

print("df size", len(df_ratings.BUYER_ADDRESS.tolist()))
print("n° collections", len(df_ratings.NFT_ADDRESS.unique()))
print("n° users", len(df_ratings.BUYER_ADDRESS.unique()))
print("n° transactions", int(df_ratings.NO_BUYS.sum()))

df size 1499889
n° collections 3774
n° users 117267
n° transactions 3794085.0


In [2]:
nft_address_to_product_id = pd.DataFrame(
    df_ratings.NFT_ADDRESS.drop_duplicates()
).reset_index(drop=True).reset_index().rename(columns={'index':'product_id'})

df_names = pd.read_csv('nft-names.csv')

df_names = df_names.sort_values(
    by='PROJECT_NAME',
    ascending = False
).drop_duplicates(
    subset = ['NFT_ADDRESS']
)

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load and pre-process the data
data = df_ratings.merge(
    nft_address_to_product_id
    ,how='inner'
    , on='NFT_ADDRESS'
).dropna().rename(
    columns = {'BUYER_ADDRESS': 'user_id','ratings':'rating'}
)[['user_id','product_id','rating']].copy()

#user/product matrix
data = data.pivot_table(index='user_id', columns='product_id', values='rating').fillna(0)

# Compute the item-item similarity matrix
item_similarity = pd.DataFrame(cosine_similarity(data.T))

In [19]:
def get_most_similar_products(product_id, rating):
    similar_scores = item_similarity.iloc[product_id]*rating
    similar_scores = similar_scores.sort_values(ascending=False)
    return similar_scores[similar_scores.index != product_id].head(20)

In [20]:
import random 
def get_recommendations(user_id):
    user_ratings = data.loc[user_id].dropna()
    recommendations = pd.Series()

    for product_id, rating in user_ratings[user_ratings>0].sort_values(ascending = False).head(10).items():
        similar_products = get_most_similar_products(product_id, rating)
        recommendations = pd.concat([recommendations,similar_products])
    
    # Aggregate the recommendations
    recommendations = recommendations.groupby(recommendations.index).sum()
    recommendations = recommendations.sort_values(ascending=False)
    return recommendations.head(20)

def get_top_collection(user):
    #randomized with top 3 collections
    collections = df_ratings[df_ratings.BUYER_ADDRESS == user]['NFT_ADDRESS'].head(3).to_list()
    weights= [1,1,1]
    return random.choices(collections, weights)[0]
    

In [33]:
#0x17dfc796d1a125d87fb43ab1d61f888966eef7b0
#0x43851e9f0f192851ce0a3a43e4bb57992884d2c7
#0x43861efd37d3b19bc25f7114bbef462174e3214a
#user = '0x43861efd37d3b19bc25f7114bbef462174e3214a'

user_test= 100
validated_recs = 0
max_valid_recs = 0
total_recs = user_test * 20
i = 1
for user in df_ratings.BUYER_ADDRESS.drop_duplicates().head(user_test).tolist():
    print('user', i)
    recs = get_recommendations(user)
    df_recs = pd.DataFrame(recs).reset_index().rename(columns={'index': 'product_id'})
    df_recs = df_recs.merge(nft_address_to_product_id, how='inner', on = 'product_id')
    
    df_recs['already_invested'] = df_recs['NFT_ADDRESS'].apply(lambda x : True if x in df_ratings[df_ratings.BUYER_ADDRESS == user].NFT_ADDRESS.unique() else False)
    print('invested recs:', df_recs['already_invested'].sum())
    
    validated_recs += df_recs['already_invested'].sum()
    max_valid_recs += len(df_ratings[df_ratings.BUYER_ADDRESS == user].NFT_ADDRESS.unique())
    i+=1

print("validated recs:", validated_recs)
print('validated vs max validatable:', validated_recs/max_valid_recs)
print('ratio:', validated_recs/total_recs)
print()
##filter out collections that the user already invested in



user 1


  recommendations = pd.Series()


invested recs: 3
user 2


  recommendations = pd.Series()


invested recs: 2
user 3


  recommendations = pd.Series()


invested recs: 1
user 4


  recommendations = pd.Series()


invested recs: 6
user 5


  recommendations = pd.Series()


invested recs: 1
user 6


  recommendations = pd.Series()


invested recs: 4
user 7


  recommendations = pd.Series()


invested recs: 9
user 8


  recommendations = pd.Series()


invested recs: 1
user 9


  recommendations = pd.Series()


invested recs: 11
user 10


  recommendations = pd.Series()


invested recs: 3
user 11


  recommendations = pd.Series()


invested recs: 6
user 12


  recommendations = pd.Series()


invested recs: 3
user 13


  recommendations = pd.Series()


invested recs: 1
user 14


  recommendations = pd.Series()


invested recs: 7
user 15


  recommendations = pd.Series()


invested recs: 2
user 16


  recommendations = pd.Series()


invested recs: 3
user 17


  recommendations = pd.Series()


invested recs: 5
user 18


  recommendations = pd.Series()


invested recs: 7
user 19


  recommendations = pd.Series()


invested recs: 2
user 20


  recommendations = pd.Series()


invested recs: 4
user 21


  recommendations = pd.Series()


invested recs: 2
user 22


  recommendations = pd.Series()


invested recs: 2
user 23


  recommendations = pd.Series()


invested recs: 10
user 24


  recommendations = pd.Series()


invested recs: 4
user 25


  recommendations = pd.Series()


invested recs: 2
user 26


  recommendations = pd.Series()


invested recs: 4
user 27


  recommendations = pd.Series()


invested recs: 3
user 28


  recommendations = pd.Series()


invested recs: 9
user 29


  recommendations = pd.Series()


invested recs: 17
user 30


  recommendations = pd.Series()


invested recs: 4
user 31


  recommendations = pd.Series()


invested recs: 1
user 32


  recommendations = pd.Series()


invested recs: 0
user 33


  recommendations = pd.Series()


invested recs: 1
user 34


  recommendations = pd.Series()


invested recs: 1
user 35


  recommendations = pd.Series()


invested recs: 3
user 36


  recommendations = pd.Series()


invested recs: 7
user 37


  recommendations = pd.Series()


invested recs: 8
user 38


  recommendations = pd.Series()


invested recs: 3
user 39


  recommendations = pd.Series()


invested recs: 4
user 40


  recommendations = pd.Series()


invested recs: 5
user 41


  recommendations = pd.Series()


invested recs: 2
user 42


  recommendations = pd.Series()


invested recs: 1
user 43


  recommendations = pd.Series()


invested recs: 6
user 44


  recommendations = pd.Series()


invested recs: 2
user 45


  recommendations = pd.Series()


invested recs: 8
user 46


  recommendations = pd.Series()


invested recs: 3
user 47


  recommendations = pd.Series()


invested recs: 4
user 48


  recommendations = pd.Series()


invested recs: 2
user 49


  recommendations = pd.Series()


invested recs: 0
user 50


  recommendations = pd.Series()


invested recs: 6
user 51


  recommendations = pd.Series()


invested recs: 3
user 52


  recommendations = pd.Series()


invested recs: 1
user 53


  recommendations = pd.Series()


invested recs: 8
user 54


  recommendations = pd.Series()


invested recs: 7
user 55


  recommendations = pd.Series()


invested recs: 5
user 56


  recommendations = pd.Series()


invested recs: 3
user 57


  recommendations = pd.Series()


invested recs: 6
user 58


  recommendations = pd.Series()


invested recs: 0
user 59


  recommendations = pd.Series()


invested recs: 8
user 60


  recommendations = pd.Series()


invested recs: 9
user 61


  recommendations = pd.Series()


invested recs: 4
user 62


  recommendations = pd.Series()


invested recs: 9
user 63


  recommendations = pd.Series()


invested recs: 2
user 64


  recommendations = pd.Series()


invested recs: 4
user 65


  recommendations = pd.Series()


invested recs: 9
user 66


  recommendations = pd.Series()


invested recs: 4
user 67


  recommendations = pd.Series()


invested recs: 1
user 68


  recommendations = pd.Series()


invested recs: 15
user 69


  recommendations = pd.Series()


invested recs: 3
user 70


  recommendations = pd.Series()


invested recs: 4
user 71


  recommendations = pd.Series()


invested recs: 4
user 72


  recommendations = pd.Series()


invested recs: 8
user 73


  recommendations = pd.Series()


invested recs: 5
user 74


  recommendations = pd.Series()


invested recs: 7
user 75


  recommendations = pd.Series()


invested recs: 3
user 76


  recommendations = pd.Series()


invested recs: 4
user 77


  recommendations = pd.Series()


invested recs: 9
user 78


  recommendations = pd.Series()


invested recs: 4
user 79


  recommendations = pd.Series()


invested recs: 7
user 80


  recommendations = pd.Series()


invested recs: 10
user 81


  recommendations = pd.Series()


invested recs: 3
user 82


  recommendations = pd.Series()


invested recs: 5
user 83


  recommendations = pd.Series()


invested recs: 4
user 84


  recommendations = pd.Series()


invested recs: 4
user 85


  recommendations = pd.Series()


invested recs: 4
user 86


  recommendations = pd.Series()


invested recs: 11
user 87


  recommendations = pd.Series()


invested recs: 17
user 88


  recommendations = pd.Series()


invested recs: 5
user 89


  recommendations = pd.Series()


invested recs: 6
user 90


  recommendations = pd.Series()


invested recs: 0
user 91


  recommendations = pd.Series()


invested recs: 1
user 92


  recommendations = pd.Series()


invested recs: 8
user 93


  recommendations = pd.Series()


invested recs: 2
user 94


  recommendations = pd.Series()


invested recs: 7
user 95


  recommendations = pd.Series()


invested recs: 5
user 96


  recommendations = pd.Series()


invested recs: 6
user 97


  recommendations = pd.Series()


invested recs: 4
user 98


  recommendations = pd.Series()


invested recs: 10
user 99


  recommendations = pd.Series()


invested recs: 10
user 100


  recommendations = pd.Series()


invested recs: 4
validated recs: 487
validated vs max validatable: 0.2200632625395391
ratio: 0.2435



In [30]:
df_ratings.BUYER_ADDRESS.drop_duplicates().head(100).tolist()

['0x207b3c1083a62e282d375a11f52730879cbfbbbb',
 '0x0c4037b72a0c63340fb530690ea123c612665a34',
 '0x0ad100536bd9e8ee6e1aadfde74ec33f80d9bfa0',
 '0x4df00689952974edbe1cc05d2710ea1b2b6e185f',
 '0x39500416f6e1544b533d71c5c3f619de158b46e9',
 '0x49236ce2eb4b67618513b37f09c101183e7367f6',
 '0x14977b0dbe7e155f9907effecbb70c9b7a05e737',
 '0x29b58978f21bf57f217495ba78830d93d5c0ea23',
 '0x13422774d8416a316390f5aa2e713c5742592aaf',
 '0x3258ea5ea3cd8639380f3e136873dcb8078160a0',
 '0x3501cfb3581e998422f398b6d41a86271777a89a',
 '0x37bb6fdb5e4be5e22f19813a28df91bfaf4638ab',
 '0x30a1963ec5a2cfe7e36dae4aaea39ef6fd499745',
 '0x1e299dc52eb9fdab6a6849f9731a948d8d72e474',
 '0x39bcc1e709d191fcf7d837d5a30724921012ff38',
 '0x11e01777ebc4ad185ea3de4a9efcb50bcd92248d',
 '0x00903f96abe2743b81fc7fcf4874c715a3fe1c88',
 '0x0008d343091ef8bd3efa730f6aae5a26a285c7a2',
 '0x0da0df4be467140e74c76257d002f52e954be4d3',
 '0x3819d14e0b3147829e072336c8bedb02b73ee0ab',
 '0x3d6f73441f28e54c28103ea972057f2c734a0f5c',
 '0x0f87316ea

In [22]:
def recommenderNFT(user):
    top_collection = get_top_collection(user)
    top_collection_id = nft_address_to_product_id.loc[nft_address_to_product_id.NFT_ADDRESS == top_collection, 'product_id']
    other_collections = pd.Series(
        item_similarity.iloc[top_collection_id].values[0]
    ).rename('rating').sort_values(ascending = False).head(10).reset_index().merge(
        nft_address_to_product_id,
        how = 'inner',
        left_on = 'index',
        right_on = 'product_id'
    ).merge(
        df_names,
        how = 'inner',
        on = 'NFT_ADDRESS'
    )[1:]['PROJECT_NAME'].to_list()
    
    print('user:',user)
    print('Because you invested in', df_names.at[df_names.loc[df_names.NFT_ADDRESS==top_collection].index[0], 'PROJECT_NAME'])
    print('You might like', other_collections)

In [38]:
user = '0x1096b85a3421794c801bdaa757efe5ef6e0ca135'
recommenderNFT(user)

user: 0x1096b85a3421794c801bdaa757efe5ef6e0ca135
Because you invested in degen toonz
You might like ['toonz minter rewards card', 'degen toonz', 'detonated toonz', 'lilhottie', 'the uncanny country club', 'angel baby hit squad', 'jarritos', 'wulf boy social club', 'killabears']
