In [17]:
import pandas as pd
from google.colab import drive
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [18]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
# Loading the raw datasets
df_1 = pd.read_csv('/content/drive/MyDrive/amazon_reviews.csv')
df_2 = pd.read_csv('/content/drive/MyDrive/amazon_reviews_2.csv')
df_3 = pd.read_csv('/content/drive/MyDrive/amazon_reviews_3.csv')
raw_df = pd.concat([df_1, df_2, df_3], ignore_index=True)

  df_3 = pd.read_csv('/content/drive/MyDrive/amazon_reviews_3.csv')


In [20]:
# Clean and explode ASINs
raw_df['asins'] = raw_df['asins'].astype(str).str.split(',')
raw_df = raw_df.explode('asins')
raw_df['asins'] = raw_df['asins'].str.strip()

In [21]:
# Clean product names
def clean_name(value):
    if pd.isna(value):
        return None
    cleaned = str(value).splitlines()[0].split(",")[0].strip()
    return cleaned if len(cleaned) > 5 else None

In [22]:
raw_df['product_name'] = raw_df['name'].apply(clean_name)
raw_df = raw_df[raw_df['product_name'].notna()]

In [23]:
# Build dictionaries
asin_to_name = raw_df[['asins', 'product_name']].drop_duplicates().set_index('asins')['product_name'].to_dict()

In [24]:
# Load embeddings and interactions
embedding_df = pd.read_csv('/content/drive/MyDrive/amazon_node2vec_embeddings_n.csv')
interactions = pd.read_csv('/content/drive/MyDrive/amazon_reviews_cleaned_n.csv')

In [25]:
embedding_df.head()

Unnamed: 0,node,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,dim_9,...,dim_55,dim_56,dim_57,dim_58,dim_59,dim_60,dim_61,dim_62,dim_63,dim_64
0,Amazon Customer,0.149419,0.135554,-0.031328,0.4842,-0.246845,-1.081997,0.113892,-0.187654,-0.724183,...,0.052541,0.595386,-0.234726,-0.879301,-0.049558,0.739805,-0.059146,0.185869,-0.432719,1.093811
1,Allen,-0.020417,-0.228187,-0.009102,-0.050839,-0.213515,-0.049315,0.309904,0.032056,-0.32602,...,-0.084227,0.011122,0.21425,-0.287767,-0.044464,-0.157193,-0.232426,-0.085146,0.214837,0.308005
2,Anonymous,0.310913,-0.180166,0.126514,-0.110365,0.230864,-0.006202,-0.115267,-0.421536,-0.213562,...,0.252555,-0.146363,-0.010406,-0.373925,-0.058373,-0.103317,0.173819,0.019791,-0.330754,-0.208411
3,Craig,0.182625,0.173018,0.061141,0.100062,0.077548,-0.140291,0.233293,0.054349,-0.26464,...,0.111093,0.019107,0.029314,0.119548,-0.336597,0.166981,-0.00179,-0.033471,0.189402,0.127752
4,Buyer,-0.077068,-0.140756,0.152001,0.408007,-0.148691,-0.401761,0.198531,0.152832,-0.308062,...,0.049827,0.203451,0.055694,0.10005,0.008072,-0.004097,0.160931,-0.041942,0.193982,0.119548


In [26]:
interactions.head()

Unnamed: 0,user_id,product_id,rating
0,Amazon Customer,B006GWO5NE,5.0
1,Allen,B00L9EPT8O,5.0
2,Allen,B01E6AO69U,5.0
3,Anonymous,B00L9EPT8O,5.0
4,Anonymous,B01E6AO69U,5.0


In [27]:
# Prepare user and product embeddings
user_embeddings = embedding_df[embedding_df['node'].isin(interactions['user_id'].unique())].set_index('node')
product_embeddings = embedding_df[embedding_df['node'].isin(interactions['product_id'].unique())].set_index('node')

In [28]:
# Recommendation function
def recommend_products(user_id, N=5):
    if user_id not in user_embeddings.index:
        print("User not found.")
        return []

    user_vector = user_embeddings.loc[user_id].values.reshape(1, -1)
    similarities = cosine_similarity(user_vector, product_embeddings.values)[0]
    product_ids = product_embeddings.index.tolist()

    sim_df = pd.DataFrame({
        'product_id': product_ids,
        'similarity': similarities
    })

    rated = interactions[interactions['user_id'] == user_id]['product_id'].tolist()
    sim_df = sim_df[~sim_df['product_id'].isin(rated)]
    sim_df['product_id'] = sim_df['product_id'].str.strip()
    sim_df = sim_df.drop_duplicates(subset='product_id')
    sim_df['name'] = sim_df['product_id'].map(asin_to_name)
    sim_df = sim_df[sim_df['name'].notna()]

    return sim_df.sort_values(by='similarity', ascending=False).head(N)

In [29]:
user_to_recommend = interactions['user_id'].iloc[0]
recommendations = recommend_products(user_to_recommend, N=5)
print(f"Top 5 recommendations for {user_to_recommend}:\n")
print(recommendations)

Top 5 recommendations for Amazon Customer:

    product_id  similarity                                               name
32  B06XD5YCKX    0.351449  All-New Kindle Oasis E-reader - 7 High-Resolut...
15  B00IOYAM4I    0.301572  Brand New Amazon Kindle Fire 16gb 7 Ips Displa...
23  B0189XZ0KY    0.293359                                  Fire HD 10 Tablet
24  B01J94SWWU    0.290231  Amazon Fire HD 8 with Alexa (8" HD Display Tab...
21  B0189XZRTI    0.265159                                  Fire HD 10 Tablet
