In [None]:
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW

import json

from statistics import mean
import pickle

In [None]:
import utils_generic as generic
import st_dep as st
import model_confs as confs


import train_datamaps as train

from train import eval_func

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
model_conf = confs.distilbert_conf
encoding_type = ''
model_name = 'distilbert'

# Datos ConvAI2

In [None]:
with open('Datasets\ConvAI2\convai2_complete.json','r') as f:
    data = json.load(f)

In [None]:
convai_train = data['train']
convai_val = data['validation']

# Cargo clase vocabulary
with open(f'vocab_{encoding_type}.pkl','rb') as f:
    vocab = pickle.load(f)

convai_train_token = generic.tokenize_dataset_with_dependencies(convai_train,['about','to','as'],vocab,model_conf) 
convai_val_token = generic.tokenize_dataset_with_dependencies(convai_val,['about','to','as'],vocab,model_conf) 

In [None]:
convai_train_dataset = {}
for task in ['to','as','about']:
    convai_train_dataset[task] = st.DatasetSingleTaskDep(convai_train_token,task,eval=False)

In [None]:
convai_val_dataset = {}
for task in ['to','as','about']:
    convai_val_dataset[task] = st.DatasetSingleTaskDep(convai_val_token,task,eval=False)

# Datos md_gender

In [None]:
with open('Datasets\md_gender\md_complete.json','r',encoding="utf8") as f:
    md_data = json.load(f)

In [None]:
md_tokenized = generic.tokenize_dataset_with_dependencies(md_data,['about','to','as'],vocab,model_conf) 


md_dataset = {}
for task in ['to','as','about']:
    md_dataset[task] = st.DatasetSingleTaskDep(md_tokenized,task,eval=True)

# Creación dataloaders

In [None]:
dl_train = {}
for task in ['to','as','about']:
    dl_train[task] = DataLoader(convai_train_dataset[task],batch_size=128,shuffle=True,collate_fn=st.collate_fn)

In [None]:
dl_val = {}
for task in ['to','as','about']:
    dl_val[task] = DataLoader(convai_val_dataset[task],batch_size=128,shuffle=True,collate_fn=st.collate_fn)

In [None]:
dl_eval = {}
for task in ['to','as','about']:
    dl_eval[task] = DataLoader(md_dataset[task],batch_size=128,shuffle=False,collate_fn=st.collate_fn)

# ABOUT

In [None]:
learning_rate = 1e-6
num_epochs = 100
task_name = 'about'
global_metrics = {'about':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_1'

In [None]:
optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])


In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])


In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_3'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])


Resultados finales

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'Resultado global ponderado{metric}:',mean(global_metrics[task][metric]['weighted_avg']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))

# TO

In [None]:
learning_rate = 1e-6
num_epochs = 100
task_name='to'
global_metrics = {'about':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_1'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_3'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

Resultados finales

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'Resultado global ponderado{metric}:',mean(global_metrics[task][metric]['weighted_avg']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))

# AS

In [None]:
learning_rate = 1e-6
num_epochs = 100
task_name='as'
global_metrics = {'about':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'precision':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'f1':{'weighted_avg':[],'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_1'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
save_path=f'{model_name}_{encoding_type}_{task_name}_3'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs,show_samples=True)

In [None]:
model = st.SingleTaskDep(model_conf,vocab,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

Resultados finales

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'Resultado global ponderado{metric}:',mean(global_metrics[task][metric]['weighted_avg']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))