In [1]:
from matplotlib import pyplot as plt
import numpy as np
import torch as tr
from torch import nn
from torch.nn.functional import cross_entropy
import pandas as pd
# from torch.utils.tensorboard import SummaryWriter
import csv
import os 

# metrics

In [2]:
from sklearn.metrics import f1_score
#se suele usar f1 score para todo lo que sean matrices?????????!
def get_f1(ref_batch,predict_batch,L,th=0.5):
    f1 = []
    for ref,predict,l in zip(ref_batch,predict_batch,L):
        ind = tr.where(ref!=-1)
        ref = ref[ind].view(l, l)
        predict = predict[ind].view(l, l)

        #esto si no lo entiendo
        predict = tr.sigmoid(predict) > th

        #parte para tener la matriz triangular superior
        ind = tr.triu_indices(ref.shape[0], ref.shape[1], offset=1)
        ref = ref[ind[0], ind[1]].numpy().ravel()
        predict = predict[ind[0], ind[1]].numpy().ravel()
        f1.append(f1_score(ref, predict, zero_division=1))
    return tr.tensor(f1).mean()
    

# Model


In [3]:
class ignacioFold(nn.Module):
    def __init__(self, embedding_dim=4, device="cpu", negative_weight=.1, lr=1e-3, 
    logger=None, pred_l1=.01,max_len=200, use_scheduler=False, **kwargs):
        super().__init__()
        self.device=device
        self.len=max_len
        self.class_weight = tr.tensor([negative_weight, 1.]).float().to(device)
        self.build_graph(embedding_dim, **kwargs)
        self.optim =  tr.optim.Adam(self.parameters(), lr=lr)

        self.to(device)

    def build_graph (self,emb_dim,kernel=9,filters=50,rank=32):
        pad = (kernel-1)//2
        dilation = 4
        self.cnn = nn.Sequential(nn.Conv1d(in_channels=emb_dim, out_channels=filters, 
                                        kernel_size=kernel, padding = pad, stride=1),
                                nn.ReLU(),
                                nn.BatchNorm1d(filters),
                                nn.Conv1d(in_channels=filters, out_channels=filters, 
                                        kernel_size=kernel, dilation = dilation,
                                        padding=dilation*pad, stride=1),
                                nn.ReLU(),
                                nn.BatchNorm1d(filters),
                                nn.Conv1d(in_channels=filters, out_channels=filters, 
                                        kernel_size=kernel, dilation = dilation,
                                        padding=dilation*pad, stride=1),
                                nn.ReLU(),
                                nn.BatchNorm1d(filters),
                                nn.Conv1d(in_channels=filters, out_channels=filters//2, 
                                        kernel_size=kernel, padding = pad, stride=1),
                                nn.Sigmoid(),)  
        self.convsal1 = nn.Conv1d(in_channels=filters//2, out_channels=rank, kernel_size=kernel, 
                                  padding=pad, stride=1)
        self.convsal2 = nn.Conv1d(in_channels=filters//2,  out_channels=rank, kernel_size=kernel,
                                  padding = pad, stride=1)


    def forward (self,x):
        n = x.shape[2]
        y = self.cnn(x)
        ya = self.convsal1(y)
        yb = self.convsal2(y)

        ya = ya.view(y.shape[0],32,n)
        yb = yb.view(y.shape[0],32,n)


        ya = tr.transpose(ya, -1, -2)
        y = ya @ yb
        yt =  tr.transpose(y, -1, -2)
        
        y = (y+yt)/2

        y = y.view(-1, n, n)
        return y
#loss and optim
    def loss_func(self,yt,y):
        y = y.view(y.shape[0], -1)
        yt = yt.view(yt.shape[0], -1)

        pred_l1_loss =  tr.mean(tr.abs(yt[y!=-1])) * 0.01 

        yt = yt.unsqueeze(1)
        yt = tr.cat((-yt, yt), dim=1) 
        error = cross_entropy(yt, y, ignore_index=-1, weight=self.class_weight)
        loss=error +pred_l1_loss
        return loss
    
    def tr(self,data_loader):
        self.train()
        avg_loss=0
        avg_f1 = 0
        train_step = 0
        for batch in data_loader:
            x = batch[0].to(self.device)
            y = batch[1].to(self.device)
            self.optim.zero_grad()  
            y_tr = self.forward(x)
            loss = self.loss_func(y_tr,y)
            f1 = get_f1(y.cpu(), y_tr.detach().cpu(), batch[2])
            avg_f1 += f1
            avg_loss += loss
            loss.backward() 
            self.optim.step()
            train_step+=1

        avg_f1 /= len(data_loader)
        avg_loss /= len(data_loader)
        return avg_loss,avg_f1
    def tst(self,data_loader):
        self.eval() 
        avg_loss=0
        avg_f1=0
        test_step = 0
        with tr.no_grad():       
            for batch in data_loader:
                x = batch[0].to(self.device)
                y = batch[1].to(self.device)
                #por que aca y no desp
                y_tst = self.forward(x)
                # y_tst=tr.tensor([[x], *batch[2:]])
                loss = self.loss_func(y_tst,y)
                #.detach()?????????
                f1 = get_f1(y.cpu(), y_tst.detach().cpu(), batch[2])
                avg_loss += loss
                avg_f1 += f1
                test_step+=1
            avg_loss /= len(data_loader)
            avg_f1 /= len(data_loader)
            
            return avg_loss,avg_f1


# main

In [4]:
import torch as tr
from torch.utils.data import DataLoader, random_split
from dataset import DatasetSeq


out_path = f"results/"
batch_size = 8
DEVICE = "cuda" 
model = ignacioFold(kernel=11, device=DEVICE,max_len=512) 
dataset=DatasetSeq('data/cluster17.csv',max_len=512)

#las mismas particiones para comparar desp con el mismo dataset

n_train = int(0.8 * len(dataset))
n_test = int(0.1 * len(dataset))
n_val = len(dataset) - n_train - n_test


#por que le ponen una seed????
train_data, val_data, test_data = random_split(dataset, [n_train, n_val, n_test], generator=tr.Generator().manual_seed(42))
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

print("train batches", len(train_loader))
print("val batches", len(val_loader))
print("test batches", len(test_loader))
best_f1, patience_counter = 0, 0
for epoch in range(250):
    train_loss,train_f1 = model.tr(train_loader)
    print(f'train loop epoch {epoch}')
    val_loss,val_f1 = model.tst(val_loader)
    if val_f1 > best_f1:
        best_f1 = val_f1
        best_epoch = epoch
        tr.save(model.state_dict(), f"{out_path}model_cluster17.pmt")
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter>50:
            break
    print(f"epoch {epoch}: train loss {train_loss:.2f} val loss {val_loss:.2f} train f1 {train_f1:.2f} val f1 {val_f1:.2f}")
print("Start test")
model.load_state_dict(tr.load(f"{out_path}model_cluster17.pmt"))
test_loss, test_f1 = model.tst(test_loader)
print(f"best epoch {best_epoch} best_valf1 {best_f1:.3f} test_loss {test_loss:.3f} test_f1 {test_f1:.3f}")


train batches 47
val batches 6
test batches 6
train loop epoch 0
epoch 0: train loss 0.12 val loss 0.09 train f1 0.00 val f1 0.00
train loop epoch 1
epoch 1: train loss 0.09 val loss 0.09 train f1 0.00 val f1 0.00
train loop epoch 2
epoch 2: train loss 0.09 val loss 0.09 train f1 0.00 val f1 0.00
train loop epoch 3
epoch 3: train loss 0.09 val loss 0.09 train f1 0.01 val f1 0.06
train loop epoch 4
epoch 4: train loss 0.09 val loss 0.08 train f1 0.01 val f1 0.00
train loop epoch 5
epoch 5: train loss 0.08 val loss 0.08 train f1 0.03 val f1 0.03
train loop epoch 6
epoch 6: train loss 0.08 val loss 0.08 train f1 0.04 val f1 0.07
train loop epoch 7
epoch 7: train loss 0.08 val loss 0.07 train f1 0.06 val f1 0.06
train loop epoch 8
epoch 8: train loss 0.07 val loss 0.07 train f1 0.08 val f1 0.11
train loop epoch 9
epoch 9: train loss 0.07 val loss 0.07 train f1 0.10 val f1 0.07
train loop epoch 10
epoch 10: train loss 0.07 val loss 0.07 train f1 0.13 val f1 0.18
train loop epoch 11
epoch 11