In [None]:
# # Re-run the full blockchain-integrated federated graph recommender using the re-uploaded file

# import numpy as np
# import pandas as pd
# import networkx as nx
# from sklearn.metrics import precision_score, recall_score, f1_score
# import random

# # Parameters
# NUM_CLIENTS = 5
# TOP_K = 5

# # Load dataset
# df = pd.read_csv("/mnt/data/Last.fm_data.csv")
# df = df.rename(columns={'Username': 'user_id', 'Artist': 'item_id'})
# df = df[['user_id', 'item_id']]
# df['count'] = 1
# df = df.groupby(['user_id', 'item_id']).count().reset_index()
# df['rating'] = pd.qcut(df['count'], q=5, labels=False) + 1

# # Partition users for simulation
# users = df['user_id'].unique()
# random.shuffle(users)
# client_user_splits = np.array_split(users, NUM_CLIENTS)

# client_datasets = []
# for users_subset in client_user_splits:
#     client_df = df[df['user_id'].isin(users_subset)]
#     client_datasets.append(client_df)

# # Graph-based recommender functions
# def build_graph(df):
#     G = nx.Graph()
#     for _, group in df.groupby("user_id"):
#         items = group['item_id'].tolist()
#         for i in range(len(items)):
#             for j in range(i + 1, len(items)):
#                 G.add_edge(items[i], items[j], weight=G.get_edge_data(items[i], items[j], {}).get('weight', 0) + 1)
#     return G

# def recommend(graph, user_df, top_k=TOP_K):
#     user_items = set(user_df['item_id'])
#     scores = {}
#     for item in user_items:
#         for neighbor in graph.neighbors(item):
#             if neighbor not in user_items:
#                 scores[neighbor] = scores.get(neighbor, 0) + graph[item][neighbor]['weight']
#     return sorted(scores, key=scores.get, reverse=True)[:top_k]

# # Evaluation metrics
# def evaluate(client_df, train_graph, top_k=TOP_K):
#     users = client_df['user_id'].unique()
#     precisions, recalls, f1s, hits = [], [], [], []
#     for user in users:
#         user_df = client_df[client_df['user_id'] == user]
#         items = user_df['item_id'].tolist()
#         if len(items) < 2:
#             continue
#         random.shuffle(items)
#         split = int(len(items) * 0.5)
#         train_items = items[:split]
#         test_items = items[split:]

#         user_partial_df = pd.DataFrame({'user_id': [user]*len(train_items), 'item_id': train_items})
#         recs = recommend(train_graph, user_partial_df, top_k)

#         y_true = [1 if item in test_items else 0 for item in recs]
#         y_pred = [1]*len(y_true)

#         if sum(y_true) > 0:
#             precisions.append(precision_score(y_true, y_pred, zero_division=0))
#             recalls.append(recall_score(y_true, y_pred, zero_division=0))
#             f1s.append(f1_score(y_true, y_pred, zero_division=0))
#             hits.append(1 if any(y_true) else 0)

#     return {
#         'precision': np.mean(precisions) if precisions else 0,
#         'recall': np.mean(recalls) if recalls else 0,
#         'f1': np.mean(f1s) if f1s else 0,
#         'hit_rate': np.mean(hits) if hits else 0
#     }

# # Token reward system
# def reward_clients(metrics_list):
#     f1_scores = np.array([metrics['f1'] for metrics in metrics_list])
#     f1_scores = f1_scores / f1_scores.sum() if f1_scores.sum() > 0 else np.ones_like(f1_scores) / len(f1_scores)
#     base_tokens = 1000
#     return {f"client_{i}": round(score * base_tokens, 2) for i, score in enumerate(f1_scores)}

# # Run simulation
# all_metrics = []
# global_graph = build_graph(df)
# for client_df in client_datasets:
#     metrics = evaluate(client_df, global_graph)
#     all_metrics.append(metrics)

# token_rewards = reward_clients(all_metrics)
# metrics_df = pd.DataFrame(all_metrics)
# metrics_df['client'] = [f'client_{i}' for i in range(NUM_CLIENTS)]
# metrics_df['tokens'] = metrics_df['client'].map(token_rewards)
# metrics_df.set_index('client', inplace=True)
# metrics_df


In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from sklearn.metrics import precision_score, recall_score, f1_score
import random

# Simulate 5 clients
NUM_CLIENTS = 5
TOP_K = 5

In [2]:
# Load dataset
df = pd.read_csv(r"C:\Users\anuna\Downloads\Last.fm_data.csv\Last.fm_data.csv")
df = df.rename(columns={'Username': 'user_id', 'Artist': 'item_id'})
df = df[['user_id', 'item_id']]
df['count'] = 1
df = df.groupby(['user_id', 'item_id']).count().reset_index()
df['rating'] = pd.qcut(df['count'], q=5, labels=False, duplicates='drop') + 1


In [3]:
# Partition users for simulation
users = df['user_id'].unique()
random.shuffle(users)
client_user_splits = np.array_split(users, NUM_CLIENTS)

client_datasets = []
for users_subset in client_user_splits:
    client_df = df[df['user_id'].isin(users_subset)]
    client_datasets.append(client_df)

In [4]:
# Graph-based recommender functions
def build_graph(df):
    G = nx.Graph()
    for _, group in df.groupby("user_id"):
        items = group['item_id'].tolist()
        for i in range(len(items)):
            for j in range(i + 1, len(items)):
                G.add_edge(items[i], items[j], weight=G.get_edge_data(items[i], items[j], {}).get('weight', 0) + 1)
    return G

def recommend(graph, user_df, top_k=TOP_K):
    user_items = set(user_df['item_id'])
    scores = {}
    for item in user_items:
        for neighbor in graph.neighbors(item):
            if neighbor not in user_items:
                scores[neighbor] = scores.get(neighbor, 0) + graph[item][neighbor]['weight']
    return sorted(scores, key=scores.get, reverse=True)[:top_k]

In [5]:
# Evaluation metrics
def evaluate(client_df, train_graph, top_k=TOP_K):
    users = client_df['user_id'].unique()
    precisions, recalls, f1s, hits = [], [], [], []
    for user in users:
        user_df = client_df[client_df['user_id'] == user]
        items = user_df['item_id'].tolist()
        if len(items) < 2:
            continue
        random.shuffle(items)
        split = int(len(items) * 0.5)
        train_items = items[:split]
        test_items = items[split:]

        user_partial_df = pd.DataFrame({'user_id': [user]*len(train_items), 'item_id': train_items})
        recs = recommend(train_graph, user_partial_df, top_k)

        y_true = [1 if item in test_items else 0 for item in recs]
        y_pred = [1]*len(y_true)

        if sum(y_true) > 0:
            precisions.append(precision_score(y_true, y_pred, zero_division=0))
            recalls.append(recall_score(y_true, y_pred, zero_division=0))
            f1s.append(f1_score(y_true, y_pred, zero_division=0))
            hits.append(1 if any(y_true) else 0)

    return {
        'precision': np.mean(precisions) if precisions else 0,
        'recall': np.mean(recalls) if recalls else 0,
        'f1': np.mean(f1s) if f1s else 0,
        'hit_rate': np.mean(hits) if hits else 0
    }

In [6]:
# Token reward system
def reward_clients(metrics_list):
    f1_scores = np.array([metrics['f1'] for metrics in metrics_list])
    f1_scores = f1_scores / f1_scores.sum() if f1_scores.sum() > 0 else np.ones_like(f1_scores) / len(f1_scores)
    base_tokens = 1000
    return {f"client_{i}": round(score * base_tokens, 2) for i, score in enumerate(f1_scores)}

In [7]:
# Run simulation
all_metrics = []
global_graph = build_graph(df)
for client_df in client_datasets:
    metrics = evaluate(client_df, global_graph)
    all_metrics.append(metrics)

token_rewards = reward_clients(all_metrics)
metrics_df = pd.DataFrame(all_metrics)
metrics_df['client'] = [f'client_{i}' for i in range(NUM_CLIENTS)]
metrics_df['tokens'] = metrics_df['client'].map(token_rewards)
metrics_df.set_index('client', inplace=True)
metrics_df

Unnamed: 0_level_0,precision,recall,f1,hit_rate,tokens
client,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
client_0,1.0,1.0,1.0,1.0,200.0
client_1,1.0,1.0,1.0,1.0,200.0
client_2,1.0,1.0,1.0,1.0,200.0
client_3,1.0,1.0,1.0,1.0,200.0
client_4,1.0,1.0,1.0,1.0,200.0
