In [2]:
import pandas as pd
import numpy as np
import networkx as nx
from web3 import Web3
import json

# Load and preprocess dataset
df = pd.read_csv(r"Last.fm_data.csv")
df.rename(columns={"Username": "user_id", "Artist": "item_id"}, inplace=True)

# Aggregate interactions
interaction_counts = df.groupby(['user_id', 'item_id']).size().reset_index(name='count')

In [3]:
# Convert count to log-scaled rating (1–5)
def count_to_rating(count):
    return min(5, max(1, int(np.log1p(count) / np.log(10) * 5)))

interaction_counts['rating'] = interaction_counts['count'].apply(count_to_rating)
interaction_counts['timestamp'] = pd.Timestamp.now().timestamp()

In [4]:
# Connect to local Ethereum node
w3 = Web3(Web3.HTTPProvider("http://127.0.0.1:8545"))
owner_account = w3.eth.accounts[0]

# Replace with your actual deployed contract ABI and address
abi = [
    {
      "anonymous": False,
      "inputs": [
        {
          "indexed": True,
          "internalType": "address",
          "name": "user",
          "type": "address"
        },
        {
          "indexed": False,
          "internalType": "bytes32",
          "name": "ratingHash",
          "type": "bytes32"
        },
        {
          "indexed": False,
          "internalType": "uint256",
          "name": "timestamp",
          "type": "uint256"
        }
      ],
      "name": "RatingSubmitted",
      "type": "event"
    },
    {
      "inputs": [
        {
          "internalType": "bytes32",
          "name": "ratingHash",
          "type": "bytes32"
        }
      ],
      "name": "submitRating",
      "outputs": [],
      "stateMutability": "nonpayable",
      "type": "function"
    }
  ]


In [5]:
contract = w3.eth.contract(address="put your contract address here", abi=abi)

# Submit hashed interactions to blockchain
def hash_rating(row):
    data = f"{row['user_id']}|{row['item_id']}|{row['rating']}|{row['timestamp']}"
    return w3.keccak(text=data)

interaction_counts['rating_hash'] = interaction_counts.apply(hash_rating, axis=1)

In [6]:
for _, row in interaction_counts.iterrows():
    tx_hash = contract.functions.submitRating(row['rating_hash']).transact({'from': owner_account})
    receipt = w3.eth.wait_for_transaction_receipt(tx_hash)
    print(f"Submitted tx: {tx_hash.hex()} | Block: {receipt.blockNumber}")


Submitted tx: f62f6da5881e943ae3eeb643163c9da0aabb05df735dfb0a0257d77d0d66c4df | Block: 113812
Submitted tx: 192550f4f60abb87a36de8604de1e8642a04f55e2489d330195d2cb4247dbae4 | Block: 113813
Submitted tx: 13787fe6fc46838f5a03080fab2b2e54bbc39b8f862c26684302b136366c59a7 | Block: 113814
Submitted tx: d1db7ef4d3a514ce049cd5761d2326295586768512e7c2480fe88bd0a437b480 | Block: 113815
Submitted tx: 4ebaf9aebf0869218e86a8e61a5f287ef8e584a477a25012117421c328b46af5 | Block: 113816
Submitted tx: 7ecbf39445402d0b9377d11f8692da65d6e7fe2cc2fa4b88c2f2eb259d234be7 | Block: 113817
Submitted tx: 5788829b86b8c03ab0f5cddefcfcab6708a4238d509ef010adbac5e3ce3b8c34 | Block: 113818
Submitted tx: 47a6cf451a25e44dfa4c8e6b6e6d4d1ed18d898012f73232e48d5dac3a07ba50 | Block: 113819
Submitted tx: d6ca401041e0db3159c1c2bfd080cdadebae98c19d0e50c0de1af12824fde456 | Block: 113820
Submitted tx: 22dcaa4e839c2f43d3bb4406427df86d4e2296fcd5dd351adc9240d86cffcb3d | Block: 113821
Submitted tx: 4fa5cee4a9ae2233be15396f7b364a271eaa

In [7]:
# Retrieve verified hashes from blockchain events
def get_verified_hashes():
    event_filter = contract.events.RatingSubmitted.create_filter(from_block='earliest')
    events = event_filter.get_all_entries()
    return set(e['args']['ratingHash'].hex() for e in events)

verified_hashes = get_verified_hashes()
interaction_counts['rating_hash_hex'] = interaction_counts['rating_hash'].apply(lambda x: x.hex())
verified_df = interaction_counts[interaction_counts['rating_hash_hex'].isin(verified_hashes)].copy()

In [8]:
# Build bipartite graph
G = nx.Graph()
G.add_nodes_from(verified_df['user_id'], bipartite='user')
G.add_nodes_from(verified_df['item_id'], bipartite='item')

for _, row in verified_df.iterrows():
    G.add_edge(row['user_id'], row['item_id'], weight=row['rating'])

In [9]:
# Recommend using Personalized PageRank
def graph_recommend(user_id, top_n=10):
    if user_id not in G:
        return []
    pr = nx.pagerank(G, personalization={user_id: 1}, alpha=0.6)
    user_items = set(G[user_id])
    recommendations = {n: score for n, score in pr.items() if G.nodes[n].get('bipartite') == 'item' and n not in user_items}
    return sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:top_n]

In [10]:
# Example usage
example_user = verified_df['user_id'].iloc[0]
print(f"Recommendations for {example_user}:")
print(graph_recommend(example_user))

Recommendations for Babs_05:
[('The Longest Johns', 4.2746461154849286e-05), ('Hans Zimmer', 4.20768640106887e-05), ('Tim Hecker', 3.782883838658928e-05), ('Aaron Frazer', 3.663126751446139e-05), ('Notwist', 2.748002234142936e-05), ('Bibio', 2.7311116170574465e-05), ('Sacred Paws', 2.6923175312528678e-05), ('Helios', 2.5561646369843552e-05), ('Seether', 2.4681127502122876e-05), ('Twin Shadow', 2.4444934072716993e-05)]


In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Train-test split
train_df, test_df = train_test_split(interaction_counts, test_size=0.2, random_state=42)

# Step 2: Build ratings matrix from train
train_matrix = train_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

# Step 3: Build similarity matrix
item_similarity = cosine_similarity(train_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=train_matrix.columns, columns=train_matrix.columns)


In [22]:
def recommend_items(user_id, top_n=5):
    if user_id not in train_matrix.index:
        return []

    user_ratings = train_matrix.loc[user_id]
    already_rated = user_ratings[user_ratings > 0].index
    scores = item_similarity_df.dot(user_ratings)
    scores = scores.drop(already_rated, errors='ignore')

    return scores.sort_values(ascending=False).head(top_n).index.tolist()

def precision_recall_hit(user_id, top_n=5):
    actual_items = test_df[test_df['user_id'] == user_id]['item_id'].unique()
    if len(actual_items) == 0:
        return None

    predicted_items = recommend_items(user_id, top_n)
    if not predicted_items:
        return None

    hits = len(set(predicted_items) & set(actual_items))
    precision = hits / top_n
    recall = hits / len(actual_items)
    hit = 1 if hits > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0

    return precision, recall, f1, hit


In [23]:
results = []

for user in test_df['user_id'].unique():
    metrics = precision_recall_hit(user, top_n=10)
    if metrics:
        results.append(metrics)

# Convert to DataFrame and compute averages
results_df = pd.DataFrame(results, columns=["precision", "recall", "f1", "hit_rate"])
print("Average metrics across users:")
print(results_df.mean())


Average metrics across users:
precision    0.890909
recall       0.039487
f1           0.072239
hit_rate     1.000000
dtype: float64
