In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from web3 import Web3
import json

In [2]:
df = pd.read_csv(r"C:\Users\anuna\Downloads\Last.fm_data.csv\Last.fm_data.csv")
df.rename(columns={"Username": "user_id", "Artist": "item_id"}, inplace=True)
interaction_counts = df.groupby(['user_id', 'item_id']).size().reset_index(name='count')

def count_to_rating(count):
    return min(5, max(1, int(np.log1p(count) / np.log(10) * 5)))

interaction_counts['rating'] = interaction_counts['count'].apply(count_to_rating)
interaction_counts['timestamp'] = pd.Timestamp.now().timestamp()

In [3]:
w3 = Web3(Web3.HTTPProvider("http://127.0.0.1:8545"))
contract_address = "0xe7f1725E7734CE288F8367e1Bb143E90bb3F0512"

abi = [
    {
        "inputs": [{"internalType": "bytes32", "name": "ratingHash", "type": "bytes32"}],
        "name": "submitRating",
        "outputs": [],
        "stateMutability": "nonpayable",
        "type": "function"
    },
    {
        "anonymous": False,
        "inputs": [
            {"indexed": True, "internalType": "address", "name": "user", "type": "address"},
            {"indexed": False, "internalType": "bytes32", "name": "ratingHash", "type": "bytes32"},
            {"indexed": False, "internalType": "uint256", "name": "timestamp", "type": "uint256"}
        ],
        "name": "RatingSubmitted",
        "type": "event"
    }
]

contract = w3.eth.contract(address=contract_address, abi=abi)
owner_account = w3.eth.accounts[0]

In [4]:
def hash_rating(row):
    data = f"{row['user_id']}|{row['item_id']}|{row['rating']}|{row['timestamp']}"
    return w3.keccak(text=data)

interaction_counts['rating_hash'] = interaction_counts.apply(hash_rating, axis=1)

#for _, row in interaction_counts.head(10000).iterrows():
for _, row in interaction_counts.iterrows():
    rating_hash_bytes = row['rating_hash']
    tx = contract.functions.submitRating(rating_hash_bytes).transact({'from': owner_account})
    w3.eth.wait_for_transaction_receipt(tx)
    print("Submitted hash:", rating_hash_bytes.hex())

Submitted hash: bf95c3373f9d1fa7a5fd69b62aeadaa9bea0c44f55d02632ddf5f7b380f637ac
Submitted hash: 6a065e556ff3b09bbaa0c4703e879f1e2e4d08f7e141f1fdad15c46af851a6e7
Submitted hash: cb2e530c2a158690349a59b0c8ac6c3d268a0d385a7d5fa4e05751dd5834c373
Submitted hash: 69cc12c58e21ed79addb1bceed264196862b63b990f8270fb2431adee365f104
Submitted hash: c163290527c7ef194d738a6543cdc1317908aafdb470e84236924cac4077aab2
Submitted hash: bd0d256628af652c4902a027e25c9202b4e1231fa5c0074888f2ab80fbb73c5e
Submitted hash: 0acde6be80cd108166740a495899b6b68d586378e60b44c6f702e5c0f66c65d0
Submitted hash: 477dd8dba21480f0fca63b5b7bf06b6704ade28113f03b74073de4b541dc66c9
Submitted hash: 1152955390c25a8f7a79450a36f1e41a9334c3419dced2be45224129233c12f7
Submitted hash: 7def8a42c4cd70b0771508c735fe75ab5b0a7000ff8e6b225e95b4b8bc321945
Submitted hash: 426d0e611b6ec7230fa2ff5710035be8ed9291489bac428a2cd7c640da45c2f6
Submitted hash: 97b04c0171b76b7e090b76fcce05202210d387d4e6a74e20b1d400de714ec39a
Submitted hash: f17938d6bf43

In [5]:
def get_verified_hashes():
    event_filter = contract.events.RatingSubmitted.create_filter(from_block=0)
    events = event_filter.get_all_entries()
    return set(e['args']['ratingHash'].hex() for e in events)

verified_hashes = get_verified_hashes()
interaction_counts['rating_hash_hex'] = interaction_counts['rating_hash'].apply(lambda x: x.hex())
verified_df = interaction_counts[interaction_counts['rating_hash_hex'].isin(verified_hashes)].copy()
print("Verified rows:", len(verified_df))

Verified rows: 51790


In [6]:
ratings_matrix = verified_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

if not ratings_matrix.empty:
    item_similarity = cosine_similarity(ratings_matrix.T)
    item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)

    def recommend_items(user_id, top_n=5):
        if user_id not in ratings_matrix.index:
            return []
        user_ratings = ratings_matrix.loc[user_id]
        already_rated = user_ratings[user_ratings > 0].index
        scores = item_similarity_df.dot(user_ratings)
        scores = scores.drop(already_rated, errors='ignore')
        return scores.sort_values(ascending=False).head(top_n)

    sample_user = ratings_matrix.index[0]
    print(f"\\n=== Recommendations for User '{sample_user}' ===")
    print(recommend_items(sample_user))
else:
    print("No verified ratings available to generate recommendations.")

\n=== Recommendations for User 'Babs_05' ===
item_id
Audioslave          7965.193495
Ludwig Goransson    7883.848930
ScHoolboy Q         7828.657347
MARBLE EMPIRE       7767.567730
SpaceGhostPurrp     7767.567730
dtype: float64


In [7]:
# ratings_matrix = verified_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

# if not ratings_matrix.empty:
#     item_similarity = cosine_similarity(ratings_matrix.T)
#     item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)

#     def recommend_items(user_id, top_n=5):
#         if user_id not in ratings_matrix.index:
#             return []
#         user_ratings = ratings_matrix.loc[user_id]
#         scores = item_similarity_df.dot(user_ratings).div(item_similarity_df.sum(axis=1))
#         return scores.sort_values(ascending=False).head(top_n)

#     sample_user = ratings_matrix.index[0]
#     print(f"\\n=== Recommendations for User '{sample_user}' ===")
#     print(recommend_items(sample_user))
# else:
#     print("No verified ratings available to generate recommendations.")

In [8]:
user_ratings = ratings_matrix.loc[sample_user]
print("Rated items:", user_ratings[user_ratings > 0].shape[0])

already_rated = user_ratings[user_ratings > 0].index
scores = item_similarity_df.dot(user_ratings)
scores = scores.drop(already_rated, errors='ignore')
print("Remaining candidate items:", scores.shape[0])


Rated items: 9325
Remaining candidate items: 13498


In [9]:
# ratings_matrix = interaction_counts.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

# from sklearn.metrics.pairwise import cosine_similarity

# item_similarity = cosine_similarity(ratings_matrix.T)
# item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)

# if not ratings_matrix.empty:
#     item_similarity = cosine_similarity(ratings_matrix.T)
#     item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)

#     def recommend_items(user_id, top_n=5):
#         if user_id not in ratings_matrix.index:
#             return []

#         user_ratings = ratings_matrix.loc[user_id]
#         already_rated = user_ratings[user_ratings > 0].index
#         scores = item_similarity_df.dot(user_ratings)
#         scores = scores.drop(already_rated, errors='ignore')

#         if scores.empty:
#             item_popularity = ratings_matrix.sum().drop(already_rated).sort_values(ascending=False)
#             return item_popularity.head(top_n)

#         return scores.sort_values(ascending=False).head(top_n)

#     sample_user = ratings_matrix.index[1]
#     print(f"\\n=== Recommendations for User '{sample_user}' ===")
#     print(recommend_items(sample_user))
# else:
#     print("No verified ratings available to generate recommendations.")

In [10]:
print("ratings_matrix shape:", ratings_matrix.shape)
sample_user = ratings_matrix.index[0]
print("Sample user ID:", sample_user)
print(ratings_matrix.loc[sample_user].value_counts())


ratings_matrix shape: (11, 22823)
Sample user ID: Babs_05
Babs_05
0.0    13498
1.0     5512
2.0     1381
3.0     1183
5.0      848
4.0      401
Name: count, dtype: int64


In [15]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(verified_df, test_size=0.2)
train_matrix = train_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

In [16]:
item_similarity = cosine_similarity(train_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=train_matrix.columns, columns=train_matrix.columns)

In [17]:
def recommend_items(user_id, top_n=5):
    if user_id not in train_matrix.index:
        return []

    user_ratings = train_matrix.loc[user_id]
    already_rated = user_ratings[user_ratings > 0].index
    scores = item_similarity_df.dot(user_ratings)
    scores = scores.drop(already_rated, errors='ignore')

    return scores.sort_values(ascending=False).head(top_n).index.tolist()

def precision_recall_hit(user_id, top_n=5):
    actual_items = test_df[test_df['user_id'] == user_id]['item_id'].unique()
    if len(actual_items) == 0:
        return None

    predicted_items = recommend_items(user_id, top_n)
    if not predicted_items:
        return None

    hits = len(set(predicted_items) & set(actual_items))
    precision = hits / top_n
    recall = hits / len(actual_items)
    hit = 1 if hits > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0

    return precision, recall, f1, hit

In [18]:
results = []

for user in test_df['user_id'].unique():
    metrics = precision_recall_hit(user, top_n=10)
    if metrics:
        results.append(metrics)

# Convert to DataFrame and compute averages
results_df = pd.DataFrame(results, columns=["precision", "recall", "f1", "hit_rate"])
print("Average metrics across users:")
print(results_df.mean())

Average metrics across users:
precision    0.854545
recall       0.037768
f1           0.069156
hit_rate     1.000000
dtype: float64
