In [None]:
import numpy as np
import torch


import json
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch import nn

from statistics import mean
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
import utils_generic as generic
import rrnn_modelo as rrnn

import train_datamaps as train

from train import eval_func

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


In [None]:
model_name = 'lr'
num_labels = 2
encoding_type = 'absolute'


# Datos ConvAI2

In [None]:
with open('Datasets\ConvAI2\convai2_complete.json','r') as f:
    data = json.load(f)

In [None]:
convai_train = data['train']
convai_val = data['validation']

for s in convai_train:
    convai_train[s]['sentence_with_deps'] = convai_train[s]['tokenized'] +' '+ ' '.join(generic.split_sentence_dep_tags(convai_train[s][encoding_type]))

sentences = [convai_train[s]['sentence_with_deps'] for s in convai_train]
vectorizer = CountVectorizer()
vectorizer.fit_transform(sentences)

convai_train_token = generic.tokenize_dataset_with_dependencies_rrnn(convai_train,['about','to','as'],tfidf=vectorizer) 


In [None]:
for s in convai_val:
    convai_val[s]['sentence_with_deps'] = convai_val[s]['tokenized'] +' '+ ' '.join(generic.split_sentence_dep_tags(convai_val[s][encoding_type]))

convai_val_token = generic.tokenize_dataset_with_dependencies_rrnn(convai_val,['about','to','as'],tfidf=vectorizer) 

In [None]:
vocab_size = len(vectorizer.vocabulary_) # Para sacar el tamaño de vocabulario con las dependencias

In [None]:
convai_train_dataset = {}
for task in ['to','as','about']:
    convai_train_dataset[task] = rrnn.DatasetSingleTaskRRNN(convai_train_token,task,eval=False) #No le pongo deps=True porque va ya en la oración

In [None]:
convai_val_dataset = {}
for task in ['to','as','about']:
    convai_val_dataset[task] = rrnn.DatasetSingleTaskRRNN(convai_val_token,task,eval=False)

# Datos md_gender

In [None]:
with open('Datasets\md_gender\md_complete.json','r',encoding="utf8") as f:
    md_data = json.load(f)

In [None]:
for s in md_data:
    md_data[s]['sentence_with_deps'] = md_data[s]['tokenized'] +' '+ ' '.join(generic.split_sentence_dep_tags(md_data[s][encoding_type]))

md_tokenized = generic.tokenize_dataset_with_dependencies_rrnn(md_data,['about','to','as'],tfidf=vectorizer) 


md_dataset = {}
for task in ['to','as','about']:
    md_dataset[task] = rrnn.DatasetSingleTaskRRNN(md_tokenized,task,eval=True)

# Creación dataloaders

In [None]:
dl_train = {}
for task in ['to','as','about']:
    dl_train[task] = DataLoader(convai_train_dataset[task],batch_size=128,shuffle=True,collate_fn=rrnn.collate_fn_no_pad_deps)

In [None]:
dl_val = {}
for task in ['to','as','about']:
    dl_val[task] = DataLoader(convai_val_dataset[task],batch_size=128,shuffle=True,collate_fn=rrnn.collate_fn_no_pad_deps)

In [None]:
dl_eval = {}
for task in ['to','as','about']:
    dl_eval[task] = DataLoader(md_dataset[task],batch_size=128,shuffle=False,collate_fn=rrnn.collate_fn_no_pad_deps)

## ABOUT

In [None]:
learning_rate = 5e-5
num_epochs = 100
task_name = 'about'
global_metrics = {'about':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_1_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_2_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_3_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))

## TO

In [None]:
learning_rate = 5e-5
num_epochs = 100
task_name = 'to'
global_metrics = {'about':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_1_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_2_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_3_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))

## AS

In [None]:
learning_rate = 5e-5
num_epochs = 100
task_name = 'as'
global_metrics = {'about':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'to':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]},
                    'as':{'recall':{'average':[],'female':[],'male':[]},
                            'precision':{'average':[],'female':[],'male':[]},
                            'f1':{'average':[],'female':[],'male':[]},
                            'acc':[]}}

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_1_v2'

optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_2_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
save_path=f'm1_{model_name}_{encoding_type}_{task_name}_3_v2'


optimizer = AdamW(model.parameters(), lr=learning_rate,weight_decay=0.1)
p, c, epochs = train.train_function(model,num_epochs,dl_train[task_name],optimizer,early_stop=10,dl_val=dl_val[task_name],save_path=save_path,es_threshold=0)
torch.save(p,save_path+'_probs'+'.pt')
torch.save(c,save_path+'_corr'+'.pt')

In [None]:
train.get_datamap_complete_graph(p,correctness_vector=c,num_epochs=epochs)

In [None]:
model = rrnn.LogisticRegression(vocab_size = vocab_size,num_labels=2).to(device)
model.load_state_dict(torch.load(save_path))
for task in ['about','to','as']:
    print('\n')
    print(f"Evaluación en el conjunto el conjunto {task}")
    metric_result = eval_func(model,dl_eval[task])
    for metric, value in metric_result.items():
        if metric=='accuracy':
            global_metrics[task]['acc'].append(value) 
        else:
            for g,v in value.items():
                global_metrics[task][metric][g].append(v)

        print(metric,metric_result[metric])

In [None]:
for metric in ['f1','recall','precision']:
    print(f'{metric} medio de los 3 modelos: \n')
    for task in ['about','to','as']:
        print(task.upper())
        print(f'Resultado global {metric}:',mean(global_metrics[task][metric]['average']))
        print(f'{metric} etiqueta male:',mean(global_metrics[task][metric]['male']))
        print(f'{metric} etiqueta female: ',mean(global_metrics[task][metric]['female']))
        print('\n')

print(f'Accuracy medio de los 3 modelos: \n')
for task in ['about','to','as']:
    print('\n',task.upper())
    print('Resultado global accuracy:',mean(global_metrics[task]['acc']))