In [1]:
import numpy as np
import pickle
import gdown
import os
import torch
from sklearn.model_selection import train_test_split
from ipywidgets import IntProgress
from IPython.display import display
import torch.nn as nn
from torch.utils.data import TensorDataset



## Descarda de datos

In [2]:
path = "./Datos ZTF/recurrent_dataset.pk"
if not os.path.isfile(path):
    url = 'https://drive.google.com/drive/folders/1vqfoxF-KyMNnLxABZZ_kb76DrUrixEud'
    gdown.download_folder(url, quiet=False)

## Exploracion

In [3]:
path = "./bases.npy"
if not os.path.isfile(path):
    with open('./Datos ZTF/recurrent_dataset.pk', 'rb') as f:
        data_rec = pickle.load(f)
    print(data_rec.keys())

dict_keys(['template', 'science', 'difference', 'labels'])


In [4]:
path = "./bases.npy"
if not os.path.isfile(path):
    print(np.shape(data_rec["template"]))
    print(np.shape(data_rec["science"]))
    print(np.shape(data_rec["difference"]))
    print(np.shape(data_rec["labels"]))

(77146, 27, 27)
(77146, 3, 27, 27)
(77146, 3, 27, 27)
(77146,)


## dataset de entrenamiento

In [6]:
path = "./bases.npy"
if not os.path.isfile(path):
    indices = np.arange(77146)
    (
        data_train,
        data_test,
        labels_train,
        labels_test,
        indices_train,
        indices_test,
    ) = train_test_split(data_rec["template"], data_rec["labels"], indices, test_size=0.1,stratify=data_rec["labels"])

### Recorte 

In [7]:
path = "./bases.npy"
if not os.path.isfile(path):
    train_label=data_rec["labels"][indices_train]

    train_template=data_rec["template"][indices_train,3:24,3:24]
    train_science=data_rec["science"][indices_train,:,3:24,3:24]
    train_difference=data_rec["difference"][indices_train,:,3:24,3:24]
    print("train")
    print(np.shape(train_template))
    print(np.shape(train_science))
    print(np.shape(train_difference))
    print("val")
    val_label=data_rec["labels"][indices_test]

    val_template=data_rec["template"][indices_test,3:24,3:24]
    val_science=data_rec["science"][indices_test,:,3:24,3:24]
    val_difference=data_rec["difference"][indices_test,:,3:24,3:24]
    print(np.shape(val_template))
    print(np.shape(val_science))
    print(np.shape(val_difference))


train
(69431, 21, 21)
(69431, 3, 21, 21)
(69431, 3, 21, 21)
val
(7715, 21, 21)
(7715, 3, 21, 21)
(7715, 3, 21, 21)


### Codicacion del autoencoder


In [8]:
class AutoEncoderV3(nn.Module):
    def __init__(
        self,
    ):
        super().__init__()
        self.encoder = nn.Sequential(
            # Bloque 1
            # [3x21x21]
            nn.Conv2d(3, 16, kernel_size=3, padding="same"),
            nn.ReLU(),
            #[16, 21, 21]
            nn.Conv2d(16, 16, kernel_size=3, padding="same"),
            nn.ReLU(),
            #[16, 21, 21]
            nn.MaxPool2d(kernel_size=2),
            #[16, 10, 10]

            # Bloque 2
            nn.Conv2d(16, 32, kernel_size=3, padding="same"),
            nn.ReLU(),
            #[32, 10, 10]
            nn.Conv2d(32, 32, kernel_size=3, padding="same"),
            nn.ReLU(),
            #[32, 10, 10]
            nn.MaxPool2d(kernel_size=2),
            # [32, 5, 5]

            # Bloque 3
            nn.Conv2d(32, 64, kernel_size=3, padding="same"),
            nn.ReLU(),
            # [64, 5, 5]
            nn.Conv2d(64, 64, kernel_size=3, padding="same"),
            nn.ReLU(),
            # [64, 5, 5]
            nn.MaxPool2d(kernel_size=2),
            # [64, 2, 2]

            # Bloque 4
            nn.Conv2d(64, 128, kernel_size=3, padding="same"),
            nn.ReLU(),
            # [128, 2, 2]
            nn.MaxPool2d(kernel_size=2),
            # [128, 1, 1]   
        )
        self.decoder = nn.Sequential(
            # BLoque 1
            # [128, 1, 1]
            nn.ConvTranspose2d(128, 64, 2,stride=1),
            nn.ReLU(),
            # [64, 2, 2]

            # BLoque 2
            nn.ConvTranspose2d(64, 64, 3,stride=2),
            nn.ReLU(),
            # [64, 5, 5]
            nn.ConvTranspose2d(64, 32, 1,stride=1),
            nn.ReLU(),
            # [32, 5, 5]

            # Bloque 3
            nn.ConvTranspose2d(32, 32, 2,stride=2),
            nn.ReLU(),
            # [32, 10, 10]
            nn.ConvTranspose2d(32, 32, 1,stride=1),
            nn.ReLU(),
            # [32, 10, 10]

            # BLoque 4
            nn.ConvTranspose2d(32, 16, 3,stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 16, 1,stride=1),
            nn.ReLU(),
            # Bloque 5
            nn.ConvTranspose2d(16, 3, 1,stride=1),
            nn.ReLU(),
        )
        
        self.net = nn.Sequential(
            self.encoder,
            self.decoder,
        )
    def forward(self, x):
        return self.net(x)
autoencoder = AutoEncoderV3() 
autoencoder.load_state_dict(torch.load('modelo_entrenado128.pth'))

<All keys matched successfully>

### Creacion del dataset entrenamiento

In [9]:
path = "./bases.npy"
if not os.path.isfile(path):
    largo_train=len(train_label)
    train=np.zeros([largo_train,3,3,21,21])
    print("train")
    for idx,data in enumerate(train):
        for i in range(3):
            data[i,0]=train_template[idx]
            data[i,1]=train_science[idx][i]
            data[i,2]=train_difference[idx][i]
    print(np.shape(train))
    print(np.shape(train_label))

    largo=len(train)
    dim=128

    data_train=np.zeros([largo,3,128])
    f = IntProgress(min=0, max=largo,description='Loading:',
        bar_style='', # 'success', 'info', 'warning', 'danger' or ''
        style={'bar_color': 'maroon'},
        orientation='horizontal') 
    display(f) # display the bar

    for idx, data in enumerate(train):
        f.value += 1

        # break
        medicion=torch.squeeze(autoencoder.encoder(torch.Tensor(data))).detach().numpy()
        data_train[idx]=medicion
    f.description='Ready:'
    f.style={'bar_color': 'green'}

train
(69431, 3, 3, 21, 21)
(69431,)


IntProgress(value=0, description='Loading:', max=69431, style=ProgressStyle(bar_color='maroon'))

### Creacion del dataset validacion

In [10]:
path = "./bases.npy"
if not os.path.isfile(path):
    largo_test=len(val_label)
    val=np.zeros([largo_test,3,3,21,21])
    print("val")
    for idx,data in enumerate(val):
        for i in range(3):
            data[i,0]=val_template[idx]
            data[i,1]=val_science[idx][i]
            data[i,2]=val_difference[idx][i]


    largo=len(val)
    data_val=np.zeros([largo,3,128])
    f = IntProgress(min=0, max=largo,description='Loading:',
        bar_style='', # 'success', 'info', 'warning', 'danger' or ''
        style={'bar_color': 'maroon'},
        orientation='horizontal') # instantiate the bar
    display(f) # display the bar

    for idx, data in enumerate(val):
        f.value += 1
        medicion=torch.squeeze(autoencoder.encoder(torch.Tensor(data))).detach().numpy()
        data_val[idx]=medicion
    f.description='Ready:'
    f.style={'bar_color': 'green'}


val


IntProgress(value=0, description='Loading:', max=7715, style=ProgressStyle(bar_color='maroon'))

In [12]:
path = "./bases.npy"
if not os.path.isfile(path):
    with open('bases.npy', 'wb') as f:
        np.save(f, data_val)
        np.save(f, val_label)
        np.save(f, data_train)
        np.save(f, train_label)



In [12]:
with open('bases.npy', 'rb') as f:
    data_val = np.load(f)
    val_label = np.load(f)
    data_train = np.load(f)
    train_label = np.load(f)

### Tensor dataset

In [None]:

my_datatrain = TensorDataset(torch.Tensor(data_train),torch.Tensor(train_label)) 
my_dataval = TensorDataset(torch.Tensor(data_val),torch.Tensor(val_label))


## Arquitectura red recurrente

In [None]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes):
        super(RNN, self).__init__()
        self.num_layers=num_layers
        self.hidden_size=hidden_size

        self.rnn=nn.GRU(input_size,hidden_size,num_layers,batch_first=True)
        self.fc=nn.Sequential(nn.Linear(hidden_size,64),
                                nn.ReLU(),
                                nn.Linear(64,32),
                                nn.ReLU(),
                                nn.Linear(32,16),
                                nn.ReLU(),
                                nn.Linear(16,num_classes)
                            )

    
    def forward(self, x,gpu):
        if gpu== True:

            h0=torch.zeros(self.num_layers,x.size(0),self.hidden_size).cuda()
        else:
            h0=torch.zeros(self.num_layers,x.size(0),self.hidden_size)

        out,_=self.rnn(x,h0)
        out=out[:,-1,:]
        out=self.fc(out)
        return out
 


In [None]:
input_size=128
hidden_size=200
num_classes=3
num_epoch=100
batch_size=32
lr=1e-4
num_layers=4
    

model=RNN(input_size,hidden_size,num_layers,num_classes)  
criterion= nn.CrossEntropyLoss() # Probar con Cross entropy
lr=1e-3
optimizer=torch.optim.Adam(model.parameters(),lr=lr) # Probar con Sgd



current_loss=0
all_losses=[]
plot_step, print_step=1000,5000
num_epoch=100
batch_size=32
use_gpu=False

train_loader = torch.utils.data.DataLoader(my_datatrain, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=use_gpu)
val_loader = torch.utils.data.DataLoader(my_dataval, batch_size=len(my_dataval), shuffle=False, pin_memory=use_gpu)
num_total_steps=len(train_loader)

if use_gpu:
    model.cuda()
    autoencoder.encoder.cuda()

for epoch in range(num_epoch):
    acumulado=0
    train_acc_count=0
    val_acc_count=0

    cumulative_train_corrects=0

    model.train()   
    for i, (x_batch, y_batch) in enumerate(train_loader):
        largo=len(x_batch)
        # print(np.shape(x_batch))
        if use_gpu:
            x_batch = x_batch.cuda()
            y_batch = y_batch.cuda()

        outputs=model(x_batch,use_gpu)

        if use_gpu:
            y_batch = y_batch.type(torch.LongTensor).cuda()
        else:
            y_batch = y_batch.type(torch.LongTensor)

        class_prediction = torch.argmax(outputs, axis=1).long()
        train_acc_count += y_batch.shape[0]
        cumulative_train_corrects += (y_batch == class_prediction).sum().item()

        loss=criterion(outputs, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        acumulado+=loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epoch}], Loss:{acumulado/num_total_steps:.4f}, acc train: {cumulative_train_corrects / train_acc_count:.4f}")

    model.eval()
    with torch.no_grad():
        val_acc_count=0
        cumulative_val_corrects=0
        for x_val, y_val in val_loader:
            if use_gpu:
                x_val = x_val.cuda()
                y_val = y_val.cuda()

            y_predicted = model(x_val,use_gpu)
            class_prediction = torch.argmax(y_predicted, axis=1).long()
            val_acc_count += y_val.shape[0]
            cumulative_val_corrects += (y_val == class_prediction).sum().item()

    print(f"Epoch [{epoch+1}/{num_epoch}], acc val: {cumulative_val_corrects / val_acc_count:.4f}")