In [None]:
import pandas as pd
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_json('http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz', lines=True)

# Select relevant columns and drop duplicates
data = data[['reviewerID', 'asin', 'overall']]
data.drop_duplicates(inplace=True)


In [None]:
# Create a directed graph from user interactions
def create_product_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['reviewerID'], row['asin'], weight=row['overall'])  # Weighted edges
    return G

# Calculate PageRank scores
def calculate_pagerank(G):
    return nx.pagerank(G, weight='weight')

# Build the graph and calculate PageRank
product_graph = create_product_graph(data)
pagerank_scores = calculate_pagerank(product_graph)

# Convert PageRank scores to DataFrame for easy access
pagerank_df = pd.DataFrame(pagerank_scores.items(), columns=['asin', 'PageRank'])
pagerank_df.set_index('asin', inplace=True)
a

In [None]:
import gzip
import json
from typing import Any

def decompress_gzip_file(file_path: str) -> Any:
    """
    Decompresses a gzipped JSON file and returns its content.
    
    Args:
    file_path (str): The path to the gzipped JSON file.
    
    Returns:
    Any: The content of the JSON file, parsed into a Python object.
    """
    with gzip.open(file_path, 'rt', encoding='utf-8') as f:
        return json.load(f)

# Example usage
file_path = ''http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz'
data = decompress_gzip_file(file_path)

In [None]:
# Create user profiles based on average ratings for each product
user_profiles = data.groupby(['reviewerID', 'asin'])['overall'].mean().unstack(fill_value=0)

# Standardize the user profiles for cosine similarity calculation
scaler = StandardScaler()
user_profiles_scaled = scaler.fit_transform(user_profiles)

# Calculate cosine similarity matrix between users
user_similarity = cosine_similarity(user_profiles_scaled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_profiles.index, columns=user_profiles.index)
