Importaciones y abrir OpernDS.csv (Tabla necesaria del dataset para estos métodos)

In [82]:
import pandas as pd
import random 


random.seed(42) 

record_ds = pd.read_csv('recordDS.csv')  
opern_data = pd.read_csv('OpernDS.csv')  

Método Random

In [83]:
def random_sheet_music(data):
    # Se Selecciona una partitura aleatoria y se retorna un objeto con los atributos de la partitura
    random_row = data.sample(1).iloc[0]
    return {
        "opern_id": random_row['opern_id'],
        "book_id": random_row['book_id'],
        "exe_num": random_row['exe_num'],
        "avg_score": random_row['avg_score']
    }

# Se selecciona una partitura al azar
random_sheet = random_sheet_music(opern_data)
print("Partitura seleccionada al azar:")
for key, value in random_sheet.items():
    print(f"{key}: {value}")


Partitura seleccionada al azar:
opern_id: 255
book_id: 0
exe_num: 466
avg_score: 73.65836909871246


Método Most Popular

In [84]:
def most_popular_sheets(data, top_n=5):
    # Obtener las partituras más populares en función de la cantidad de veces que ha sido practicada
    # que se obtiene de la columna 'exe_num'    
    popular_sheets = data.sort_values(by='exe_num', ascending=False).head(top_n)
    return popular_sheets[['opern_id', 'book_id', 'exe_num', 'avg_score']]


popular_sheets = most_popular_sheets(opern_data, top_n=5)

print("Partituras más populares:")
print(popular_sheets)



Partituras más populares:
    opern_id  book_id  exe_num  avg_score
0        250        0     1747  67.701145
22       272        1     1543  70.250162
3        253        0     1225  73.169224
23       273        1     1220  71.026066
1        251        0      916  72.238210


In [85]:
record_ds['interacted'] = record_ds['qa_array'].apply(lambda x: any(q[1] == 1 for q in eval(x)))  # Convertir qa_array de string a lista

# Filtrar las filas donde el estudiante ha interactuado
ground_truth = record_ds[record_ds['interacted']][['user_id', 'opern_id']].drop_duplicates()

print("Ground Truth:")
print(ground_truth.head())

Ground Truth:
   user_id  opern_id
1      467       272
3      467       276
5      467       283
6      472       279
7      472       274


In [86]:
# Crear recomendaciones aleatorias (se seleccionan 10 partituras aleatorias)
random_recommendations = opern_data.sample(n=10, random_state=42)
random_recommendations['predicted'] = 1

# Crear recomendaciones más populares (top 10 partituras con más ejecuciones)
most_popular_recommendations = opern_data.nlargest(10, 'exe_num')
most_popular_recommendations['predicted'] = 1

print("Recomendaciones Aleatorias:")
print(random_recommendations[['opern_id', 'predicted']])

print("Recomendaciones Más Populares:")
print(most_popular_recommendations[['opern_id', 'predicted']])


Recomendaciones Aleatorias:
    opern_id  predicted
37       289          1
24       274          1
25       275          1
36       288          1
34       286          1
39       291          1
4        254          1
12       262          1
8        258          1
3        253          1
Recomendaciones Más Populares:
    opern_id  predicted
0        250          1
22       272          1
3        253          1
23       273          1
1        251          1
2        252          1
4        254          1
24       274          1
7        257          1
6        256          1


In [87]:
interaction_dict = record_ds.apply(lambda row: (row['user_id'], row['opern_id'], any(q[1] == 1 for q in eval(row['qa_array']))), axis=1)
interaction_dict = dict(((user_id, opern_id), interacted) for user_id, opern_id, interacted in interaction_dict)

ground_truth['interacted'] = ground_truth.apply(
    lambda row: 1 if interaction_dict.get((row['user_id'], row['opern_id']), 0) else 0, axis=1
)

In [88]:
def calculate_metrics(ground_truth, recommendations):
    merged = pd.merge(ground_truth, recommendations, on='opern_id', how='left')
    
    # Se asigna 0 para no interactuado y no recomendado
    y_true = merged['interacted'].fillna(0)  
    y_pred = merged['predicted'].fillna(0)  
    
    # Cálculo de Precision, Recall, F1-Score
    TP = sum((y_true == 1) & (y_pred == 1))  # Verdaderos positivos
    FP = sum((y_true == 0) & (y_pred == 1))  # Falsos positivos
    FN = sum((y_true == 1) & (y_pred == 0))  # Falsos negativos
    TN = sum((y_true == 0) & (y_pred == 0))  # Verdaderos negativos
    
    # Precision
    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    
    # Recall
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    
    # F1-Score
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
    
    # AUC
    sorted_indices = sorted(range(len(y_pred)), key=lambda i: y_pred[i], reverse=True)
    sorted_y_true = [y_true[i] for i in sorted_indices]
    sorted_y_pred = [y_pred[i] for i in sorted_indices]
    
    # Se inicializan los valores de la tasa de verdaderos positivos (TPR) y falsos positivos (FPR)
    tpr = [0]  # True positive rate
    fpr = [0]  # False positive rate
    for i in range(1, len(sorted_y_true)):
        tp = sum(1 for j in range(i) if sorted_y_true[j] == 1)
        fp = sum(1 for j in range(i) if sorted_y_true[j] == 0)
        tpr.append(tp / (sum(1 for j in range(len(sorted_y_true)) if sorted_y_true[j] == 1)))
        fpr.append(fp / (sum(1 for j in range(len(sorted_y_true)) if sorted_y_true[j] == 0)))
    
    # Cálculo de AUC 
    auc = 0
    for i in range(1, len(tpr)):
        auc += (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]) / 2
    
    return {
        "Recall": recall,
        "Precision": precision,
        "F1 Score": f1,
        "AUC": auc
    }

# Calculo de las métricas
random_metrics = calculate_metrics(ground_truth, random_recommendations)
popular_metrics = calculate_metrics(ground_truth, most_popular_recommendations)

print("Métricas Random:", random_metrics)
print("Métricas Most Popular:", popular_metrics)


Métricas Random: {'Recall': 0.24622665643387057, 'Precision': 0.940400586223742, 'F1 Score': 0.39026862645717186, 'AUC': 0.4976005197988459}
Métricas Most Popular: {'Recall': 0.44499872090048603, 'Precision': 0.927486003732338, 'F1 Score': 0.60143486904659, 'AUC': 0.4810329939682597}
