In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from collections import Counter

In [None]:
df = pd.read_csv('/content/drive/MyDrive/amazon_reviews_cleaned_n.csv')

In [None]:
df.head()

Unnamed: 0,user_id,product_id,rating
0,ByMG,B00QWO9P0O,4.0
1,ByMG,B00LH3DMUO,4.0
2,ByAmazon Customer,B00QWO9P0O,5.0
3,ByAmazon Customer,B00LH3DMUO,5.0
4,ByAdam,B00QWO9P0O,4.0


In [None]:
# Creating an empty NetworkX graph
G = nx.Graph()

In [None]:
# Adding user and product nodes separately with bipartite labels
users = df['user_id'].unique()
products = df['product_id'].unique()

In [None]:
G.add_nodes_from(users, bipartite = 'users')
G.add_nodes_from(products, bipartite = 'products')

In [None]:
# Adding edges between users and products with rating as edge weight
for _,row in df.iterrows():
  G.add_edge(row['user_id'], row['product_id'], weight = row['rating'])

In [None]:
print(f"Total nodes: {G.number_of_nodes()}")
print(f"Total edges: {G.number_of_edges()}")

Total nodes: 887
Total edges: 6112


In [None]:
# Counting degrees for understanding node connectivity
degrees = Counter(dict(G.degree()).values())

In [None]:
print(degrees)

Counter({4: 169, 6: 160, 5: 115, 7: 90, 8: 59, 9: 50, 10: 36, 11: 21, 13: 19, 12: 18, 14: 18, 3: 16, 15: 15, 16: 10, 1: 9, 2: 8, 19: 8, 18: 8, 21: 5, 17: 5, 20: 4, 26: 4, 134: 3, 27: 2, 449: 2, 93: 2, 254: 2, 36: 2, 47: 2, 30: 1, 25: 1, 22: 1, 137: 1, 42: 1, 83: 1, 379: 1, 402: 1, 38: 1, 187: 1, 60: 1, 112: 1, 32: 1, 147: 1, 188: 1, 242: 1, 40: 1, 48: 1, 349: 1, 87: 1, 35: 1, 502: 1, 266: 1, 55: 1, 305: 1})


In [None]:
import pickle

In [None]:
with open('/content/drive/MyDrive/amazon_user_product_graph_n.pkl', 'wb') as f:
    pickle.dump(G, f)