In [19]:
import torch
import os
#from ch7.weather.model.model import TcnClassifier
#from ch7.weather.utils import sliding_window
import pandas as pd
import numpy as np
import random
import copy

raw_df = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vQwC6jRtVUk-2dkk2W3BDJZTOdsS427LN8Ixo-rQF4Afs6ice0rof7qh_EbnAy5lYEGqX-TCSvjpPyr/pub?gid=1713335339&single=true&output=csv',
                        index_col=['codigo','Tiempo']).drop(['Fecha','Exposicion'], axis=1)

features_cont      = raw_df.columns.tolist()
imputed_train_df   = abs(pd.read_csv('imputed_train_df.csv', index_col=['codigo']).drop('Unnamed: 0', axis=1))#.sort_index(inplace=False))
imputed_val_df     = abs(pd.read_csv('imputed_val_df.csv', index_col=['codigo']).drop('Unnamed: 0', axis=1))#.sort_index(inplace=False))
imputed_df         = pd.concat([imputed_val_df, imputed_train_df])#.sort_index(inplace=False)
codigos            = imputed_df.index.get_level_values('codigo').unique().tolist()
imputed_df.columns = raw_df.columns.tolist()
imputed_df['alfafeto_bad'] = np.where((imputed_df['Alfa-fetoprot'] > 10), 'Yes', 'No')

features_cont      = imputed_df.select_dtypes(float).columns.tolist()
features_cat = ['alfafeto_bad']

In [20]:
df = copy.deepcopy(imputed_df)

from ch7.weather.utils import sliding_window
w = 3
X, Y = [], []
for l in codigos:
    df_l = df.loc[l]
    D = []
    for f in features_cont:
        D.append(df_l[f].interpolate('linear').fillna(0).values)
    for f in features_cat:
        D.append(df_l[f].map({'Yes': 1, 'No': 0}).fillna(0).values)
        # transpose to time series
    TS = []
    for i in range(df_l.shape[0]):
        row = []
        for c in D:
            row.append(c[i])
        TS.append(row)
    in_seq, out_seq = sliding_window(TS, w, 1)
    rain_seq = [r[0][-1] for r in out_seq]
    X.extend(in_seq)
    Y.extend(rain_seq)

# Train-Validation Split
X_train, Y_train = [], []
X_val, Y_val = [], []
for i in range(len(X)):
    if random.random() > .8:
        X_val.append(X[i])
        Y_val.append(Y[i])
    else:
        X_train.append(X[i])
        Y_train.append(Y[i])

x_train = torch.tensor(X_train).float().transpose(1, 2)

params = {"tcl_num":          4,
        "tcl_channel_size": 25,
        "kernel_size":      4,
        "dropout":          0.1,
        "slices":           1,
        "use_bias":         True,
        "lr":                0.03388437885302203}
tcl_num = params['tcl_num']
tcl_channel_size = params['tcl_channel_size']
# temporal casual layer channels
channel_sizes = [tcl_channel_size] * tcl_num
# convolution kernel size
kernel_size = params['kernel_size']
dropout = params['dropout']
slices = params['slices']
use_bias = params['use_bias']
lr = params['lr']
model_params = {
    'num_inputs':   len(features_cont) + len(features_cat),
    'num_classes':  2,
    'num_channels': channel_sizes,
    'kernel_size':  kernel_size,
    'dropout':      dropout,
    'slices':       slices,
    'act':          'relu',
    'use_bias':     use_bias
}
from ch7.weather.model.model import TcnClassifier

model = TcnClassifier(**model_params)


print(x_train.shape)

prediction = model(x_train)

print(prediction.shape)

torch.Size([87, 52, 3])
torch.Size([87, 2])


In [21]:
def sliding_window(ts, features,Y_idx, target_len = 1):
    X, Y = [], []
    for i in range(features + target_len, ts.shape[0] + 1):  #
        
        
        X.append(ts[i - (features + target_len):i - target_len,:]) #
        Y.append(ts[i - target_len:i, Y_idx]) #
        
    return  X,  Y




X_seq, Y_seq = sliding_window(np.array(df.loc['T15']), w, np.where(df.columns == 'alfafeto_bad')[0][0])

np.array(X_seq).shape

(6, 3, 52)

In [22]:

def get_one_patient_windows(a_patient, target_var, window_length, target_length = 1):
    patient_followup      = df.loc[a_patient]
    patient_target_var    = patient_followup.pop(target_var)
    sequence_length       = patient_followup.shape[0]
    window_plus_target    = window_length + target_length
    X = list()
    y = list()
    for i, time_point in enumerate(range(window_plus_target, sequence_length + 1)):
        X_multivar_window = patient_followup.iloc[(time_point - window_plus_target) : (time_point - target_length), :]
        Y_target          = patient_target_var.iloc[(time_point - target_length) : time_point].map({'Yes': 1, 'No': 0})
        X.append(torch.tensor(X_multivar_window.to_numpy()))
        y.append(torch.tensor(Y_target.to_list()))
                
    patient_windows = torch.cat(X).reshape(i+1,window_length,51)
    patient_target  = torch.cat(y)
    return patient_windows, patient_target

patient_windows, patient_target = get_one_patient_windows('T15', 'alfafeto_bad', 3)

In [50]:

from torch.utils.data import Dataset, DataLoader, RandomSampler

class patients_batch(Dataset):
    def __init__(self, codigos, target_var, window_length):
        super(patients_batch, self).__init__()
        self.codigos       = codigos
        self.target_var    = target_var
        self.window_length = window_length
        
    def __len__(self):
        return len(self.codigos)
    
    def __getitem__(self, index):
        one_patient_X, one_patient_y = get_one_patient_windows(self.codigos[index], self.target_var, self.window_length)      
        return one_patient_X, one_patient_y
    
    
batch_dataset = patients_batch(codigos, 'alfafeto_bad', 3)
batch_loader  = DataLoader(batch_dataset, batch_size=3)



X_batch, y_batch = next(iter(batch_loader))

X_batch_reshaped = X_batch.reshape(X_batch.shape[0]*X_batch.shape[1],3,51).transpose(1,2).float()#.shape #.shape, y_batch.flatten().shape
y_batch_reshaped = y_batch.flatten().long()

In [45]:
params = {"tcl_num":          4,
        "tcl_channel_size": 25,
        "kernel_size":      4,
        "dropout":          0.1,
        "slices":           1,
        "use_bias":         True,
        "lr":                0.03388437885302203}
tcl_num = params['tcl_num']
tcl_channel_size = params['tcl_channel_size']
# temporal casual layer channels
channel_sizes = [tcl_channel_size] * tcl_num
# convolution kernel size
kernel_size = params['kernel_size']
dropout = params['dropout']
slices = params['slices']
use_bias = params['use_bias']
lr = params['lr']
model_params = {
    'num_inputs':   len(features_cont),# + len(features_cat),
    'num_classes':  2,
    'num_channels': channel_sizes,
    'kernel_size':  kernel_size,
    'dropout':      dropout,
    'slices':       slices,
    'act':          'relu',
    'use_bias':     use_bias
}

model_params['num_inputs']


51

In [56]:
model = TcnClassifier(**model_params)

optimizer = torch.optim.Adam(params = model.parameters(), lr = lr)
cl_loss = torch.nn.CrossEntropyLoss()


training_loss = []
epochs        = 10
for _ in range(epochs):
    
    for X_batch, y_batch in batch_loader:

        X_batch_reshaped = X_batch.reshape(X_batch.shape[0]*X_batch.shape[1],3,51).transpose(1,2).float()#.shape #.shape, y_batch.flatten().shape
        y_batch_reshaped = y_batch.flatten().long()
        
        
        prediction = model(X_batch_reshaped)
        loss       = cl_loss(prediction, y_batch_reshaped)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        training_loss.append(loss.item()) 
    
