In [None]:
import numpy as np
import torch


import json
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch import nn


from statistics import mean

In [None]:
import utils_generic as generic
import rrnn_modelo as rrnn



import train_datamaps as train
from train import eval_func

from gensim.models import KeyedVectors

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

word2vec_path = r"C:\Users\kuina\gensim-data\word2vec-google-news-300\word2vec-google-news-300.gz"
word2vec_model =  KeyedVectors.load_word2vec_format(word2vec_path, binary=True)

In [None]:
model_name='w2v'

# Datos ConvAI2

In [None]:
with open('Datasets\ConvAI2\convai2_complete.json','r') as f:
    data = json.load(f)

In [None]:
convai_train = data['train']
convai_val = data['validation']

# Me quedo solo con el vocabulario que necesito (interseco el de train con el de w2v) y así reduzco la matriz de embeddings
vocabulary = generic.create_w2v_vocab(convai_train,word2vec_model)
word_to_index = {}
emb_weights = []
for new_id, word in zip(range(len(vocabulary)),vocabulary):
    word_to_index[word] = new_id
    emb_weights.append(word2vec_model[word])

emb_weights = np.array(emb_weights)



In [None]:
convai_train_token = generic.tokenize_dataset_rrnn(convai_train,['about','to','as'],word_to_index) 
convai_val_token = generic.tokenize_dataset_rrnn(convai_val,['about','to','as'],word_to_index) 

In [None]:
convai_train_dataset = {}
for task in ['to','as','about']:
    convai_train_dataset[task] =rrnn.DatasetSingleTaskRRNN(convai_train_token,task,eval=False)

In [None]:
convai_val_dataset = {}
for task in ['to','as','about']:
    convai_val_dataset[task] = rrnn.DatasetSingleTaskRRNN(convai_val_token,task,eval=False)

# Datos md_gender

In [None]:
with open('Datasets\md_gender\md_complete.json','r',encoding="utf8") as f:
    md_data = json.load(f)

In [None]:
md_tokenized = generic.tokenize_dataset_rrnn(md_data,['about','to','as'],word_to_index) 


md_dataset = {}
for task in ['to','as','about']:
    md_dataset[task] = rrnn.DatasetSingleTaskRRNN(md_tokenized,task,eval=True)

# Creación dataloaders

In [None]:
dl_train = {}
for task in ['to','as','about']:
    dl_train[task] = DataLoader(convai_train_dataset[task],batch_size=128,shuffle=True,collate_fn=rrnn.collate_fn)

In [None]:
dl_val = {}
for task in ['to','as','about']:
    dl_val[task] = DataLoader(convai_val_dataset[task],batch_size=128,shuffle=True,collate_fn=rrnn.collate_fn)

In [None]:
dl_eval = {}
for task in ['to','as','about']:
    dl_eval[task] = DataLoader(md_dataset[task],batch_size=128,shuffle=False,collate_fn=rrnn.collate_fn)

# Hiperparámetros

In [None]:
vocab_size = len(vocabulary)
num_labels = 2
lstm_hidden_dim = 128

# ABOUT

In [None]:
learning_rate = 5e-5
num_epochs = 100
task = 'about'

global_metrics = {'about':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_1'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train['about'],optimizer,early_stop=10,dl_val=dl_val['about'],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train['about'],optimizer,early_stop=10,dl_val=dl_val['about'],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_3'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train['about'],optimizer,early_stop=10,dl_val=dl_val['about'],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))

# TO

In [None]:
learning_rate = 1e-6
num_epochs = 100
task = 'to'
global_metrics = {'about':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_1'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task],optimizer,early_stop=10,dl_val=dl_val[task],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task],optimizer,early_stop=10,dl_val=dl_val[task],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(p,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_3'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task],optimizer,early_stop=10,dl_val=dl_val[task],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))

# AS

In [None]:
# del model
# torch.cuda.empty_cache()

In [None]:
learning_rate = 1e-6
num_epochs = 100
task='as'
global_metrics = {'about':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_1'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task],optimizer,early_stop=10,dl_val=dl_val[task],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task],optimizer,early_stop=10,dl_val=dl_val[task],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
save_path=f'm1_{model_name}_simple_{task}_3'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task],optimizer,early_stop=10,dl_val=dl_val[task],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.SingleTaskRRNN(vocab_size = vocab_size,lstm_hidden_dim = lstm_hidden_dim).to(device)
model.load_state_dict(torch.load(save_path))

for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))