In [6]:
import pandas as pd
import ast

### BPRMF, Grocery and Gourmet Food

In [13]:
prediction_path  = "log/BPRMF/BPRMF__/Users/ASUS/Downloads/lab_ML/big_homework/ori_ReChorus/data/Grocery_and_Gourmet_Food__0__lr=0/rec-BPRMF-test.csv"

def parse_array(column_value):
    try:
        # Replace non-standard types with Python-native ones
        clean_value = column_value.replace("np.int64", "").replace("np.float32", "")
        return ast.literal_eval(clean_value)
    except Exception as e:
        print(f"Error parsing: {column_value}\n{e}")
        return []

predictions = pd.read_csv(
    prediction_path,
    sep='\t',  # Tab-separated
    converters={
        'rec_items': parse_array,
        'rec_predictions': parse_array,
    }
)

predictions.sample(5)

Unnamed: 0,user_id,rec_items,rec_predictions
11825,11826,"[8684, 5998, 5381, 4813, 5151, 3814, 3043, 233...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
12559,12560,"[5801, 1126, 8289, 5881, 20, 965, 4720, 4737, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1900,1901,"[8665, 6283, 2177, 8612, 3418, 8705, 3842, 566...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
10691,10692,"[8086, 3402, 6027, 2685, 21, 3649, 2217, 1120,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1260,1261,"[8038, 792, 2220, 8638, 2476, 3445, 3830, 578,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [16]:
test_path = "data/Grocery_and_Gourmet_Food/test.csv"

test = pd.read_csv(test_path, sep='\t', converters={'neg_items': ast.literal_eval})
test.sample(5)

Unnamed: 0,user_id,item_id,time,neg_items
1212,7979,2511,1339718400,"[933, 3319, 4074, 6130, 1269, 1303, 3256, 4177..."
295,11530,2164,1280707200,"[307, 3356, 7106, 3696, 3318, 1253, 1849, 3718..."
5122,13626,6657,1383696000,"[896, 7303, 5502, 2645, 2011, 8420, 992, 3867,..."
13720,11151,8513,1404518400,"[5468, 715, 6083, 1301, 6787, 4555, 2060, 825,..."
342,1034,3128,1290816000,"[6782, 170, 1653, 8647, 6074, 5223, 5716, 4796..."


In [17]:
merged = predictions.merge(test, on="user_id", how="inner")

In [18]:
true_positives = 0
false_positives = 0
false_negatives = 0

In [19]:
for _, row in merged.iterrows():
    predicted_items = row['rec_items']
    positive_item = row['item_id']

    if positive_item in predicted_items:
        true_positives += 1
    else:
        false_negatives += 1

    false_positives += len([item for item in predicted_items if item != positive_item])

In [20]:
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

accuracy = true_positives / len(merged)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

Accuracy: 1.0000
Precision: 0.0100
Recall: 1.0000
F1 Score: 0.0198


### BPRMF, Movie Lens 1M TopK

In [21]:
prediction_path  = "log/BPRMF/BPRMF__/Users/ASUS/Downloads/lab_ML/big_homework/ori_ReChorus/data/MovieLens_1M/ML_1MTOPK__0__lr=0/rec-BPRMF-test.csv"

def parse_array(column_value):
    try:
        # Replace non-standard types with Python-native ones
        clean_value = column_value.replace("np.int64", "").replace("np.float32", "")
        return ast.literal_eval(clean_value)
    except Exception as e:
        print(f"Error parsing: {column_value}\n{e}")
        return []

predictions = pd.read_csv(
    prediction_path,
    sep='\t',  # Tab-separated
    converters={
        'rec_items': parse_array,
        'rec_predictions': parse_array,
    }
)

predictions.sample(5)

Unnamed: 0,user_id,rec_items,rec_predictions
522,1091,"[1394, 2246, 1805, 2124, 2208, 2799, 1719, 398...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1746,4155,"[3001, 898, 1782, 523, 2721, 1837, 264, 1613, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2429,5166,"[2091, 1768, 1365, 697, 3097, 2414, 3024, 1590...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2671,5437,"[1224, 2354, 1929, 2180, 879, 1659, 1437, 1829...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
34,25,"[3061, 2311, 2068, 1073, 96, 332, 737, 1406, 8...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [22]:
test_path = "data/MovieLens_1M/ML_1MTOPK/test.csv"

test = pd.read_csv(test_path, sep='\t', converters={'neg_items': ast.literal_eval})
test.sample(5)

Unnamed: 0,user_id,item_id,time,c_hour_c,c_weekday_c,c_period_c,c_day_f,neg_items
234,1627,234,1038192000.0,10,0,1,943,"[2498, 1788, 862, 839, 2123, 2654, 132, 1789, ..."
2416,3544,1952,1044901000.0,2,1,8,1021,"[31, 1517, 2719, 3098, 1146, 827, 1077, 266, 1..."
1465,4613,968,1042074000.0,8,3,0,988,"[2551, 785, 768, 1526, 750, 3078, 1640, 2739, ..."
2284,3310,82,1044567000.0,5,4,0,1017,"[2915, 108, 644, 466, 610, 2251, 2737, 1595, 7..."
2788,2658,40,1046188000.0,23,1,7,1035,"[1897, 1748, 2645, 1170, 2903, 2078, 1726, 571..."


In [23]:
merged = predictions.merge(test, on="user_id", how="inner")

true_positives = 0
false_positives = 0
false_negatives = 0

for _, row in merged.iterrows():
    predicted_items = row['rec_items']
    positive_item = row['item_id']

    if positive_item in predicted_items:
        true_positives += 1
    else:
        false_negatives += 1

    false_positives += len([item for item in predicted_items if item != positive_item])

precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

accuracy = true_positives / len(merged)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

Accuracy: 0.0171
Precision: 0.0002
Recall: 0.0171
F1 Score: 0.0003
