# Libraries

In [57]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

In [58]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Import data

In [59]:
%run Data_preprocessing_v1.ipynb

************************************************************
There are 6 set of X
X_train_selected_t1, X_train_selected_t2, X_valid_selected_t1,X_valid_selected_t2,X_test_selected_t1,X_test_selected_t2
------------------------------------------------------------
Normalized version
X_train_selected_t1_norm, X_train_selected_t2_norm, X_valid_selected_t1_norm,X_valid_selected_t2_norm,X_test_selected_t1_norm,X_test_selected_t2_norm
------------------------------------------------------------
There are 4 set of Y
y_train_t1, y_train_t2, y_valid_t1, y_valid_t2
when training, please use: 'y_train_t1_value,y_train_t2_value,y_valid_t1_value,y_valid_t2_value'
************************************************************


In [60]:
print(X_train_selected_t2_norm.shape)
print(X_valid_selected_t2_norm.shape)
print(X_test_selected_t2_norm.shape)

(16760, 54)
(2394, 54)
(4790, 54)


# Hyperparameters

In [61]:
input_size = 54
sequence_length = 1
output_size = 1 # live or dead
num_layers = 3
hidden_size=30
learning_rate = 0.001
batch_size = 64
num_epochs = 500
l2_lamb = 0.0005

# Create JusNN

In [62]:
class JusNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(JusNN, self).__init__()
        
        self.convlayer = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=6, kernel_size=2, stride=1, padding=0),
            nn.AvgPool1d(kernel_size=4,stride=2),
            nn.Conv1d(in_channels=6, out_channels=1, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
        )
        
        self.fclayers = nn.Sequential(
            nn.Linear(input_size, 50),
            nn.ReLU(),
            nn.Linear(50, 24),
        )
        
        self.batchnorm1 = nn.BatchNorm1d(59)
        #self.batchnorm2 = nn.BatchNorm1d(30)
        #self.dropout = nn.Dropout(p=0.5)
        
        self.fc1 = nn.Linear(24, 1)
        
    def forward(self, x):
         x1 = F.leaky_relu(self.fclayers(x))
         #print(x1.shape)
         x2 = self.convlayer(x)
         #print(x2.shape)
            
         x3 = x1+x2
         x3 = self.fc1(x3)
         return x3

# Data transform

In [63]:
class CusDatasetLoader(Dataset):
    def __init__(self,x,y):
        self.len = y.shape[0]
        self.x_data = x
        self.y_data = y
  
    def __len__(self):
        return self.len

    def __getitem__(self, index):
        #return self.dataframe.iloc[index]
        return self.x_data[index], self.y_data[index]

In [64]:
# X_shape(X) : used for split the data into a right tensor size
#input: X in shape of 16760 rows × 7488 columns (df)
#output: X in shape of torch.Size([16760, 312, 24]) (tensor)
def X_shape(X):
    X_tensor = torch.from_numpy(np.array(X)).to(torch.float32)
    X_tensor = torch.split(X_tensor,1)
    X_tensor = torch.stack(X_tensor)
    print("X now in shape of",X_tensor.shape)
    return X_tensor

# y_tensor(y) : used for split the data into a right tensor size
#input: X in shape of 16760 rows × 7488 columns (df)
#output: X in shape of torch.Size([16760, 312, 24]) (tensor)
def y_tensor(y):
    y= torch.from_numpy(np.array(y)).to(torch.float32).reshape(len(y),1)
    print("y now in shape of",y.shape)
    return y

In [65]:
# X should be in smaples, 1, squence, rows
X1 = X_shape(X_train_selected_t2_norm)
y1 = y_tensor(y_train_t2.iloc[:,-1])

train_datasets = CusDatasetLoader(X1, y1)
train_loader = DataLoader(dataset=train_datasets, batch_size=batch_size, shuffle=True)

x,y = train_datasets[0]
print(x.shape)
print(y.shape)

X now in shape of torch.Size([16760, 1, 54])
y now in shape of torch.Size([16760, 1])
torch.Size([1, 54])
torch.Size([1])


# Model train

In [66]:
model = JusNN(input_size, hidden_size, num_layers, output_size).to(device)

In [67]:
criterion = nn.MSELoss()
optimizer = optim.Adadelta(model.parameters(), lr = learning_rate)

In [68]:
losses=[]
acc_list=[]
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.permute(0,1,2)
        #print(data.shape)
        data = data.to(device=device)
        targets = targets.to(device=device)
        targets = torch.squeeze(targets, 1)
        #print(targets)
        
        # forward
        scores = model(data)
        #print(scores.shape)
        loss = criterion(scores, targets)
        l2_lambda = l2_lamb
        l2_norm = sum(p.pow(2.0).sum()
                  for p in model.parameters())
 
        loss = loss + l2_lambda * l2_norm
    
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # gradient descent or adam step
        optimizer.step()
        
    if epoch%5 == 0:
        losses.append(loss.detach().numpy() )
        print(f'Epoch {epoch:03}: | Loss: {loss:.5f} ')
        

Epoch 000: | Loss: 16.02535 
Epoch 005: | Loss: 14.75311 
Epoch 010: | Loss: 14.91039 
Epoch 015: | Loss: 9.71114 
Epoch 020: | Loss: 8.00420 
Epoch 025: | Loss: 5.72879 
Epoch 030: | Loss: 4.38315 
Epoch 035: | Loss: 4.31457 
Epoch 040: | Loss: 6.67893 
Epoch 045: | Loss: 2.59293 
Epoch 050: | Loss: 6.78536 
Epoch 055: | Loss: 3.48445 
Epoch 060: | Loss: 4.61111 
Epoch 065: | Loss: 5.79332 
Epoch 070: | Loss: 3.96021 
Epoch 075: | Loss: 2.66718 
Epoch 080: | Loss: 5.07338 
Epoch 085: | Loss: 3.47440 
Epoch 090: | Loss: 3.99813 
Epoch 095: | Loss: 5.37459 
Epoch 100: | Loss: 4.45372 
Epoch 105: | Loss: 6.02709 
Epoch 110: | Loss: 3.82781 
Epoch 115: | Loss: 5.72869 
Epoch 120: | Loss: 11.19768 
Epoch 125: | Loss: 4.69570 
Epoch 130: | Loss: 5.38069 
Epoch 135: | Loss: 5.44462 
Epoch 140: | Loss: 4.32249 
Epoch 145: | Loss: 5.08893 
Epoch 150: | Loss: 3.73787 
Epoch 155: | Loss: 5.62320 
Epoch 160: | Loss: 11.95387 
Epoch 165: | Loss: 4.74573 
Epoch 170: | Loss: 4.16571 
Epoch 175: | Lo

# Model RMSE

In [73]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

def check_rmse(loader, model):

    # Set model to eval
    model.eval()

    with torch.no_grad():
        for data, targets in loader:
            data = data.permute(0,1,2)
            #print(data.shape)
            data = data.to(device=device)
            targets = targets.to(device=device)
            targets = torch.squeeze(targets, 1)

            scores = model(data)
            res = rmse(scores,targets)
            print(res)
            
    # Toggle model back to train
    model.train()
    
    

In [74]:
X2 = X_shape(X_valid_selected_t2_norm)
y2 = y_tensor(y_valid_t2.iloc[:,-1])

Valid_datasets = CusDatasetLoader(X2, y2)
Valid_loader = DataLoader(dataset=train_datasets, batch_size=batch_size, shuffle=True)
print(f"RMSE on test set: {check_rmse(Valid_loader, model)*100:.2f} %")

X now in shape of torch.Size([2394, 1, 54])
y now in shape of torch.Size([2394, 1])
tensor(2.6790)
tensor(1.9424)
tensor(1.9985)
tensor(1.5515)
tensor(1.5406)
tensor(2.2862)
tensor(1.8800)
tensor(2.0082)
tensor(2.2136)
tensor(2.0294)
tensor(2.1773)
tensor(2.1629)
tensor(2.1617)
tensor(2.2278)
tensor(2.1295)
tensor(1.8482)
tensor(1.9325)
tensor(1.5646)
tensor(1.9344)
tensor(1.7955)
tensor(2.1730)
tensor(2.1426)
tensor(1.8682)
tensor(1.8532)
tensor(1.8276)
tensor(2.0679)
tensor(1.8687)
tensor(2.0467)
tensor(2.3827)
tensor(2.0190)
tensor(1.9705)
tensor(1.9690)
tensor(1.7118)
tensor(2.1212)
tensor(2.2777)
tensor(1.6681)
tensor(1.9309)
tensor(2.6509)
tensor(2.1872)
tensor(2.0891)
tensor(2.0952)
tensor(2.1847)
tensor(2.0904)
tensor(1.9486)
tensor(1.5859)
tensor(2.4112)
tensor(1.9723)
tensor(2.3834)
tensor(1.6345)
tensor(1.8415)
tensor(1.7791)
tensor(1.7980)
tensor(1.9163)
tensor(2.0734)
tensor(1.6948)
tensor(2.0367)
tensor(1.6204)
tensor(2.3004)
tensor(1.8649)
tensor(2.0392)
tensor(2.1070)
t

TypeError: unsupported operand type(s) for *: 'NoneType' and 'int'

# Test

In [None]:
def test(loader, model):
    # Set model to eval
    model.eval()
    res = torch.tensor([], dtype=torch.int64)
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device).permute(0,2,1)
            print(x.shape)
            y = torch.squeeze(y.to(device=device), 1)
            print(y.shape)

            scores = model(x)
            _, predictions = scores.max(1)
            res =  torch.cat((predictions, res), 0)  
    model.train()
   
    return res

In [None]:
X_test

In [None]:
X_test_final = X_shape(X_test)
y_final = torch.zeros([4790,1])

test_datasets = CusDatasetLoader(X_test_final, y_final)
test_loader = DataLoader(dataset=test_datasets, batch_size=batch_size, shuffle=True)

result = test(test_loader, model)
result

In [None]:

#df.index = ['Row_1', 'Row_2', 'Row_3', 'Row_4']
res1 = pd.DataFrame(result, columns=["Predicted"])
res1.index = X_test.index
res1.index.name = 'Id'
pd.DataFrame(res1).to_csv('out.csv')


#

#

#