In [1]:
import pandas as pd
import numpy as np


In [3]:
df = pd.read_csv('recommendation_assets.csv')

In [4]:
df

Unnamed: 0,AssetID,UserID,Asset Name,Category,Description,Interaction type,User rating,Price
0,A1000,U27,RzQAncYp,Stock,Rare Piece,Share,2,2232.78
1,A1001,U35,QxVOOC5f,Artwork,Popular Investment,Like,1,3933.26
2,A1002,U46,6hkPPvNu,Artwork,Popular Song,View,3,536.85
3,A1003,U50,q6zLG4aX,Stock,Innovative Song,Purchase,4,4996.07
4,A1004,U3,FGgVHmsV,Artwork,Rare Song,Share,3,4616.01
...,...,...,...,...,...,...,...,...
195,A1195,U23,kmXF7rlm,Music,Rare Track,Share,5,596.05
196,A1196,U46,0UAfeOKB,Stock,Unique Investment,View,4,354.01
197,A1197,U22,zF1h9IAD,Music,Rare Track,Share,3,3236.54
198,A1198,U35,OFjFIL5J,NFT,Unique Collection,Like,5,105.53


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import LabelEncoder

In [6]:
label_cat = LabelEncoder()
label_interact = LabelEncoder()
df['Category'] = label_cat.fit_transform(df['Category'])
df['Interaction type'] = label_interact.fit_transform(df['Interaction type'])

In [8]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=500)
description_matrix = tfidf_vectorizer.fit_transform(df['Description'])

In [10]:
print(description_matrix)

  (0, 6)	0.7145569462990581
  (0, 8)	0.6995772798596057
  (1, 5)	0.7371334516195437
  (1, 7)	0.6757471971850552
  (2, 9)	0.7371334516195437
  (2, 7)	0.6757471971850552
  (3, 4)	0.7108069796904654
  (3, 9)	0.7033871178969077
  (4, 9)	0.7071067811865476
  (4, 8)	0.7071067811865476
  (5, 4)	0.7108069796904654
  (5, 5)	0.7033871178969077
  (6, 1)	0.7108069796904654
  (6, 8)	0.7033871178969077
  (7, 10)	0.6783882604257051
  (7, 0)	0.734703592012851
  (8, 9)	0.7371334516195437
  (8, 7)	0.6757471971850552
  (9, 2)	0.7371334516195437
  (9, 7)	0.6757471971850552
  (10, 3)	0.6997160164164957
  (10, 1)	0.7144210917730733
  (11, 2)	0.7371334516195437
  (11, 7)	0.6757471971850552
  (12, 4)	0.7108069796904654
  :	:
  (187, 7)	0.67188683062133
  (188, 9)	0.7071067811865476
  (188, 8)	0.7071067811865476
  (189, 9)	0.7371334516195437
  (189, 7)	0.6757471971850552
  (190, 0)	0.7310762238742312
  (190, 6)	0.6822957972066036
  (191, 5)	0.7371334516195437
  (191, 7)	0.6757471971850552
  (192, 1)	0.70710678

In [12]:
category_matrix = np.array(df['Category']).reshape(-1, 1)
content_matrix = np.hstack([description_matrix.toarray(), category_matrix])

In [14]:
content_similarity = cosine_similarity(content_matrix)

def recommend_content_based(asset_id, top_n=5):
    idx = df[df['AssetID'] == asset_id].index[0]
    similar_assets = list(enumerate(content_similarity[idx]))
    sorted_similar_assets = sorted(similar_assets, key=lambda x: x[1], reverse=True)[1:top_n+1]
    recommendations = [df.iloc[i[0]]['AssetID'] for i in sorted_similar_assets]
    return recommendations

In [20]:
user_item_matrix = df.pivot_table(index='UserID', columns='AssetID', values='User rating', fill_value=0)

# Matrix factorization using SVD
svd = TruncatedSVD(n_components=20)
user_factors = svd.fit_transform(user_item_matrix)
item_factors = svd.components_

# Compute user-asset recommendations
user_item_pred = np.dot(user_factors, item_factors)

def recommend_collaborative(user_id, top_n=5):
    user_idx = user_item_matrix.index.get_loc(user_id)
    user_ratings = user_item_pred[user_idx]
    top_items = np.argsort(user_ratings)[::-1][:top_n]
    recommendations = user_item_matrix.columns[top_items]
    return recommendations.tolist()

In [21]:
def recommend_hybrid(user_id, asset_id, top_n=5):
    # Combine content-based and collaborative recommendations
    content_recommendations = recommend_content_based(asset_id, top_n)
    collaborative_recommendations = recommend_collaborative(user_id, top_n)
    combined_recommendations = list(set(content_recommendations + collaborative_recommendations))
    return combined_recommendations[:top_n]

In [24]:
print("Content-based Recommendations:")
content_based = recommend_content_based(asset_id='A1000')

# Collaborative recommendation for a user
print("Collaborative Recommendations:")
collaborative_based = recommend_collaborative(user_id='U27')

# Hybrid recommendations
print("Hybrid Recommendations:")
hybrid_based = recommend_hybrid(user_id='U27', asset_id='A1000')


Content-based Recommendations:
Collaborative Recommendations:
Hybrid Recommendations:


In [26]:
def recommed(based):    
    results_list = [] 

    for i in based:
        results = df[df['AssetID'] == i]

        results_list.append(results)

    final_results = pd.concat(results_list, ignore_index=True)

    final_results_sorted = final_results.sort_values(by='User rating',ascending=False)

    final_results_sorted['Category'] = label_cat.inverse_transform(final_results_sorted['Category'])

    print(final_results_sorted)

In [28]:
recommed(content_based)
recommed(collaborative_based)
recommed(hybrid_based)

  AssetID UserID Asset Name Category     Description  Interaction type  \
0   A1074    U19   AlPXRtuG    Stock      Rare Piece                 1   
1   A1129    U12   RUkPle2n    Stock      Rare Piece                 1   
3   A1194    U29   lZmOrSl6      NFT      Rare Piece                 2   
2   A1017    U21   enHCCJZJ      NFT      Rare Piece                 2   
4   A1154    U31   1gEoG2tA    Stock  Creative Piece                 1   

   User rating    Price  
0            4  2176.11  
1            3  3541.68  
3            2   916.24  
2            1  1878.26  
4            1  3579.29  
  AssetID UserID Asset Name Category      Description  Interaction type  \
0   A1109    U27   ZAglKwjx    Stock      Unique Song                 1   
1   A1086    U27   3zEesMMX      NFT      Popular Art                 1   
2   A1040    U27   SknoLSyj    Music   Creative Track                 0   
3   A1131    U27   VtcW8hiI    Stock   Innovative Art                 3   
4   A1029    U27   K6jOF

In [36]:
def recommend_content_based_by_category(category_name, top_n=5):
    # Check if category exists
    if category_name not in label_cat.classes_:
        return f"Category '{category_name}' not found in the dataset."

    # Filter data by category
    category_id = label_cat.transform([category_name])[0]
    category_indices = df[df['Category'] == category_id].index

    # Compute similarity for assets in this category
    category_similarity = content_similarity[category_indices][:, category_indices]

    # Recommend top-N similar items for each item in the category
    recommendations = []
    for idx in category_indices:
        similar_assets = list(enumerate(content_similarity[idx]))
        sorted_similar_assets = sorted(similar_assets, key=lambda x: x[1], reverse=True)[1:top_n + 1]

        # Append unique recommendations
        for sim in sorted_similar_assets:
            asset_id = df.iloc[sim[0]]['AssetID']
            if asset_id not in recommendations:
                recommendations.append(asset_id)

    return recommendations[:top_n]

In [35]:
def recommend_collaborative_by_category(category_name, top_n=5):
    # Check if category exists
    if category_name not in label_cat.classes_:
        return f"Category '{category_name}' not found in the dataset."

    # Filter items in the category
    category_id = label_cat.transform([category_name])[0]
    category_items = df[df['Category'] == category_id]['AssetID']

    # Predict user-item ratings only for category items
    category_item_indices = [user_item_matrix.columns.get_loc(asset_id) for asset_id in category_items if asset_id in user_item_matrix.columns]

    # Aggregate predictions for all users
    avg_ratings = np.mean(user_item_pred[:, category_item_indices], axis=0)

    # Recommend top-N items based on average predicted ratings
    top_items = np.argsort(avg_ratings)[::-1][:top_n]
    recommendations = user_item_matrix.columns[category_item_indices].values[top_items]

    return recommendations.tolist()

In [37]:
def recommend_hybrid_by_category(category_name, top_n=5):
    # Get content-based and collaborative recommendations
    content_recommendations = recommend_content_based_by_category(category_name, top_n)
    collaborative_recommendations = recommend_collaborative_by_category(category_name, top_n)

    # Combine and deduplicate recommendations
    combined_recommendations = list(set(content_recommendations + collaborative_recommendations))
    return combined_recommendations[:top_n]

In [38]:
hybrid = recommend_hybrid_by_category("Artwork", top_n=5)

In [39]:
hybrid

['A1163', 'A1048', 'A1050', 'A1152', 'A1157']

In [40]:
recommed(hybrid)

  AssetID UserID Asset Name Category          Description  Interaction type  \
4   A1157    U22   6QtLRu0Y  Artwork   Popular Collection                 1   
2   A1050    U21   RbqrcJgK  Artwork  Creative Investment                 0   
3   A1152    U16   I67F65e8    Music   Popular Investment                 2   
0   A1163    U42   9oQkOdfn    Music   Popular Investment                 3   
1   A1048     U4   eqFYo0pL  Artwork  Creative Investment                 3   

   User rating    Price  
4            5  3639.74  
2            3  2504.49  
3            2   397.44  
0            1  2990.46  
1            1  3676.85  
