In [1]:
import pandas as pd
import ast


### Prompt y preprocesamiento inicial

In [8]:

df = pd.read_csv('../data/codeforces_processed_data.csv')

tags = df['tags'].apply(ast.literal_eval)
all_tags = set([tag for sublist in tags for tag in sublist])

df=df.head(100)

def prompt(description):
    all_tags_str = ', '.join(all_tags)
    return f'Give this set of {all_tags_str} tags and this problem ${description}, give me the set of problem tags in the following format: greedy, implementation, dp'
    
df['prompt'] = df['description'].apply(prompt)

df.to_csv('../data/codeforce_chatgpt.csv', index=False)


### Procesar las respuestas de chatgpt

In [9]:
df = pd.read_csv('../data/codeforce_chatgpt.csv')

tags = df['tags'].apply(ast.literal_eval)
all_tags = set([tag for sublist in tags for tag in sublist])

def analice(ans):
    ans=ans.split(', ')
    
    return str([i for i in ans if i in all_tags])

df['chatgpt_tags'] = df['chatgpt_tags'].apply(analice)

df.to_csv('../data/codeforce_chatgpt.csv', index=False)



### Evaluación de los resultados 

In [6]:
df = pd.read_csv('../data/codeforce_chatgpt.csv')

df['tags'].apply(ast.literal_eval)
df['chatgpt_tags'].apply(ast.literal_eval)

0     [interactive, implementation, greedy]
1         [greedy, implementation, strings]
2             [math, number theory, graphs]
3             [math, number theory, graphs]
4        [greedy, implementation, sortings]
                      ...                  
95             [greedy, implementation, dp]
96                 [greedy, implementation]
97             [greedy, implementation, dp]
98      [dp, trees, greedy, implementation]
99             [greedy, implementation, dp]
Name: chatgpt_tags, Length: 100, dtype: object

In [7]:
from sklearn.metrics import accuracy_score, f1_score

# Etiquetas verdaderas y predichas (ejemplo)
y_true = df['tags'].tolist()  # Etiquetas verdaderas
y_pred = df['chatgpt_tags'].tolist()  # Etiquetas predichas por ChatGPT

# Tag Accuracy
accuracy = accuracy_score(y_true, y_pred)

# F1 Score (macro)
f1_macro = f1_score(y_true, y_pred, average='macro')

# F1 Score (micro)
f1_micro = f1_score(y_true, y_pred, average='micro')

# F1 Score (weighted)
f1_weighted = f1_score(y_true, y_pred, average='weighted')

f1_samples = f1_score(y_true, y_pred, average=None)
f1_samples_avg = f1_samples.mean()

print("Tag Accuracy:", accuracy)
print("F1 Score (macro):", f1_macro)
print("F1 Score (micro):", f1_micro)
print("F1 Score (weighted):", f1_weighted)
print("F1 Score (samples):", f1_samples_avg)

Tag Accuracy: 0.02
F1 Score (macro): 0.0024305555555555556
F1 Score (micro): 0.02
F1 Score (weighted): 0.017555555555555557
F1 Score (samples): 0.0024305555555555556
