In [18]:
import pandas as pd
import ast
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

### GFormer, Grocery

In [19]:
prediction_path = "/public/home/zhemodedc/tianchi/GFormer/GFormer-main/log_grocery/predict.csv"

def parse_array(column_value):
    try:
        # Replace non-standard types with Python-native ones
        clean_value = column_value.replace("np.int64", "").replace("np.float32", "")
        return ast.literal_eval(clean_value)
    except Exception as e:
        print(f"Error parsing: {column_value}\n{e}")
        return []

predictions = pd.read_csv(
    prediction_path,
    sep='\t',  # Tab-separated
    converters={
        'rec_items': parse_array,
        'rec_predictions': parse_array,
    }
)

predictions.sample(5)

Unnamed: 0,user_id,item_id,neg_items
12722,12722,8648,"[1274, 3474, 6933, 8471, 1048, 7076, 6565, 247..."
7445,7445,7623,"[8327, 6541, 8082, 1047, 6682, 7066, 1830, 815..."
13598,13598,8656,"[4620, 3855, 2199, 2333, 8109, 8118, 3264, 454..."
146,146,8480,"[4742, 7564, 5133, 8214, 1306, 5276, 8480, 311..."
12538,12538,7190,"[3712, 513, 6916, 6537, 8465, 5906, 6549, 7190..."


In [20]:
test_path = "/public/home/zhemodedc/ReChorus-master/data/Grocery_and_Gourmet_Food/test.csv"

test = pd.read_csv(test_path, sep='\t', converters={'neg_items': ast.literal_eval})
test.sample(5)

Unnamed: 0,user_id,item_id,time,neg_items
11867,2029,6279,1402012800,"[6302, 4988, 975, 7284, 8478, 2568, 4373, 3690..."
4272,14613,5123,1378598400,"[8607, 2860, 5693, 5346, 6059, 242, 6648, 4411..."
12973,1913,8062,1403654400,"[8575, 6931, 1407, 6893, 4728, 5702, 6943, 441..."
8078,9766,2165,1393718400,"[946, 6693, 7390, 1630, 4633, 8527, 2014, 4253..."
7880,5816,8581,1393286400,"[8389, 489, 3089, 3433, 4633, 428, 6508, 4582,..."


In [21]:
def evaluate_predictions(predictions, ground_truths):
    """
    Evaluate model predictions using accuracy, precision, recall, and F1 score.
    
    :param predictions: DataFrame with 'user_id', 'item_id', columns
    :param ground_truths: Dictionary with 'user_id' as keys and a list of relevant items as values
    :return: A dictionary with accuracy, precision, recall, and f1_score
    """
    all_preds = []
    all_true = []
    
    for _, row in predictions.iterrows():
        user_id = row['user_id']
        predicted_item = row['item_id']  # Single predicted item
        
        if user_id in ground_truths:
            true_items = ground_truths[user_id]  # Actual relevant items for the user
            
            # Check if the predicted item is in the list of true items
            if predicted_item in true_items:
                all_preds.append(1)  # 1 for correct prediction (true positive)
                all_true.append(1)   # 1 for true relevant item
            else:
                all_preds.append(0)  # 0 for incorrect prediction (false positive)
                all_true.append(0)   # 0 for false relevant item

    # Accuracy: Proportion of correct predictions
    accuracy = accuracy_score(all_true, all_preds)
    
    # Precision, Recall, F1 Score
    precision = precision_score(all_true, all_preds)
    recall = recall_score(all_true, all_preds)
    f1 = f1_score(all_true, all_preds)
    
    # Return evaluation metrics
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }

In [22]:
metrics = evaluate_predictions(predictions, test)

print(metrics)

{'accuracy': nan, 'precision': 0.0, 'recall': 0.0, 'f1_score': 0.0}


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### GFormer, Movie Lens

In [23]:
prediction_path = "/public/home/zhemodedc/tianchi/GFormer/GFormer-main/log_movie_lens/predict.csv"

def parse_array(column_value):
    try:
        # Replace non-standard types with Python-native ones
        clean_value = column_value.replace("np.int64", "").replace("np.float32", "")
        return ast.literal_eval(clean_value)
    except Exception as e:
        print(f"Error parsing: {column_value}\n{e}")
        return []

predictions = pd.read_csv(
    prediction_path,
    sep='\t',  # Tab-separated
    converters={
        'rec_items': parse_array,
        'rec_predictions': parse_array,
    }
)

predictions.sample(5)

Unnamed: 0,user_id,item_id,neg_items
185,185,1051,"[2560, 770, 2829, 1810, 1051, 2206, 928, 3112,..."
12,12,1557,"[1408, 2441, 2187, 2323, 1557, 2081, 1067, 183..."
86,86,2221,"[1665, 647, 2187, 2961, 2964, 2715, 1575, 2221..."
180,180,2690,"[2690, 2308, 1670, 3083, 2474, 430, 2490, 445,..."
87,87,1823,"[1796, 261, 2055, 3081, 2316, 398, 2321, 2706,..."


In [24]:
test_path = "/public/home/zhemodedc/ReChorus-master/data/MovieLens_1M/test.csv"

test = pd.read_csv(test_path, sep='\t', converters={'neg_items': ast.literal_eval})
test.sample(5)

Unnamed: 0,user_id,item_id,time,c_hour_c,c_weekday_c,c_period_c,c_day_f,neg_items
2308,3310,102,1044568000.0,5,4,0,1017,"[963, 1674, 1214, 434, 1298, 1864, 2117, 3112,..."
799,3933,2953,1039930000.0,13,6,3,963,"[2545, 927, 906, 1465, 1375, 1628, 1568, 1910,..."
243,1627,2190,1038193000.0,11,0,2,943,"[2064, 2192, 2100, 2309, 1104, 2447, 1963, 91,..."
378,229,902,1038759000.0,0,0,8,950,"[1094, 2523, 1816, 1526, 938, 3036, 183, 2505,..."
1683,368,1360,1042489000.0,4,1,8,993,"[854, 2439, 2413, 1484, 315, 2661, 2468, 1768,..."


In [25]:
metrics = evaluate_predictions(predictions, test)

print(metrics)

{'accuracy': nan, 'precision': 0.0, 'recall': 0.0, 'f1_score': 0.0}


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
