# Libraries

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from sklearn import metrics

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Import data

In [None]:
%run Data_preprocessing.ipynb

In [None]:
print(X_train_norm.shape)
print(X_valid_norm.shape)
print(X_test_norm.shape)

# Imbalanced Learning

In [None]:
Full = pd.DataFrame(np.concatenate((X_train_norm,pd.DataFrame(y_train_t1.iloc[:,-1])),axis=1))
Full

label0 = Full[Full[61]==0]
label1 = Full[Full[61]==1]
print(label0.shape,label1.shape)

In [None]:
from imblearn.over_sampling import RandomOverSampler
oversample = RandomOverSampler()
x_over, y_over = oversample.fit_resample(X_train_norm, y_train_t1.iloc[:,-1])


y_over = pd.DataFrame(y_over)
y_over.shape
x_over.shape

In [None]:
Full = pd.DataFrame(np.concatenate((x_over,y_over),axis=1))
Full

label0 = Full[Full[61]==0]
label1 = Full[Full[61]==1]
print(label0.shape,label1.shape)

In [None]:
X_train_norm=x_over

y_train_t1 = y_over

# Hyperparameters

In [None]:
input_size = X_train_norm.shape[1] # 7488/24
output_size = 2 # live or dead
learning_rate = 0.01
batch_size = 64
num_epochs = 200

# Create FullyNet

In [None]:
class NN(nn.Module):
    def __init__(self, input_size):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 52)
        self.fc2 = nn.Linear(52, 26)
        self.fc3 = nn.Linear(26, 13)
        self.fc4 = nn.Linear(13, 1)
        
        self.batchnorm1 = nn.BatchNorm1d(52)
        self.batchnorm2 = nn.BatchNorm1d(26)
        self.dropout = nn.Dropout(p=0.1)

        
        
    def forward(self, x):
         x = F.leaky_relu(self.fc1(x))
         x = self.batchnorm1(x)
         x = F.leaky_relu(self.fc2(x))
         x = self.batchnorm2(x)
         x = F.leaky_relu(self.fc3(x))
         x = self.dropout(x)
         x = torch.sigmoid(self.fc4(x))
         return x

# Data transform

In [None]:
class CusDatasetLoader(Dataset):
    def __init__(self,x,y):
        self.len = y.shape[0]
        self.x_data = x
        self.y_data = y
  
    def __len__(self):
        return self.len

    def __getitem__(self, index):
        #return self.dataframe.iloc[index]
        return self.x_data[index], self.y_data[index]

In [None]:
# X_shape(X) : used for split the data into a right tensor size
#input: X in shape of 16760 rows × 104 columns (df)
#output: X in shape of torch.Size([16760, 104, 1]) (tensor)

def X_tensor(X):
    X_tensor = torch.from_numpy(np.array(X)).to(torch.float32)
    #print(X_tensor.shape)
    #X_tensor = torch.stack(X_tensor).permute()
    print("X now in shape of",X_tensor.shape)
    return X_tensor

# y_tensor(y) : used for split the data into a right tensor size
#input: X in shape of 16760 rows × 7488 columns (df)
#output: X in shape of torch.Size([16760, 312, 24]) (tensor)

def y_tensor(y):
    y= torch.from_numpy(np.array(y)).to(torch.float32).reshape(len(y),1)
    print("y now in shape of",y.shape)
    return y

In [None]:
# X should be in smaples, 1, squence, rows
X1 = X_tensor(X_train_norm)
y1 = y_tensor(y_train_t1.iloc[:,-1].astype(float))

train_datasets = CusDatasetLoader(X1, y1)
train_loader = DataLoader(dataset=train_datasets, batch_size=batch_size, shuffle=True)

x,y = train_datasets[0]
print(x.shape)
print(y.shape)

# Model train

In [None]:
model = NN(input_size).to(device)

In [None]:
optimizer = optim.Adadelta(model.parameters(), lr = learning_rate)
criterion = nn.BCELoss()

In [None]:
losses=[]
acc_list=[]
num_correct = 0
num_samples = 0
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        
        # l2 regularization
        l2_lambda = 0.02
        l2_norm = sum(p.pow(2.0).sum()
                  for p in model.parameters())
        loss = loss + l2_lambda * l2_norm
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # gradient descent or adam step
        optimizer.step()
        
        #accuracy
        scores = model(data)
        predictions = scores
        predictions = predictions.detach().apply_( lambda x: 1 if x >= 0.5 else 0 )
        num_correct += int((predictions == targets).sum())
        num_samples += predictions.size(0)
        acc = num_correct / num_samples
        
    if epoch%5 == 0:
        losses.append(loss.detach().numpy() )
        acc_list.append(acc)
    print(f'Epoch {epoch:03}: | Loss: {loss:.5f} | Acc: {acc:.3f}')
        
        

In [None]:
#plotting the loss
x_range = list(range(5,5*len(losses)+5,5))
plt.plot(x_range,losses)
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.show()

# Model Accuracy

In [None]:
num_correct = 0
num_samples = 0

# Set model to eval
model.eval()

with torch.no_grad():
    for x, y in train_loader:
        x = x.to(device=device)
        y.to(device=device)
        #y = torch.squeeze(y.to(device=device), 1)
        #print(y.shape)

        scores = model(x)
        predictions = scores
        predictions = predictions.apply_( lambda x: 1 if x >= 0.5 else 0 )
        num_correct += int((predictions == y).sum())
        num_samples += predictions.size(0)

# Toggle model back to train
model.train()
#print(num_correct , num_samples)
num_correct / num_samples

In [None]:
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0

    # Set model to eval
    model.eval()

    with torch.no_grad():
        for x, y in train_loader:
            x = x.to(device=device)
            y.to(device=device)
            #y = torch.squeeze(y.to(device=device), 1)
            #print(y.shape)

            scores = model(x)
            predictions = scores
            predictions = predictions.apply_( lambda x: 1 if x >= 0.5 else 0 )
            num_correct += int((predictions == y).sum())
            num_samples += predictions.size(0)

    # Toggle model back to train
    model.train()
    return num_correct / num_samples

In [None]:
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:2f} %")
#print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f} %")

# Model Performance on validation dataset

In [None]:
X2 = X_tensor(X_valid_norm)
y2 = y_tensor(y_valid_t1.iloc[:,-1])

Valid_datasets = CusDatasetLoader(X2, y2)
Valid_loader = DataLoader(dataset=Valid_datasets, batch_size=batch_size, shuffle=True)
print(f"Accuracy on valid set: {check_accuracy(Valid_loader, model)*100:.2f} %")

In [None]:
y_true = np.array(y_valid_t1.iloc[:,[-1]])
all_y_pred = []

model.eval()
with torch.no_grad():
    for x, y in Valid_loader:
            x = x.to(device=device)
            y = y.to(device=device)
            #y = torch.squeeze(y.to(device=device), 1)
            
            pred_y1 = torch.sigmoid(model(x))
            y_pred = pred_y1.squeeze(-1).detach().numpy()
            all_y_pred = np.append(all_y_pred, y_pred)

print(y_true.shape)
print(all_y_pred.reshape(-1,1).shape)
fpr, tpr, _ = metrics.roc_curve(y_true, all_y_pred)
roc_auc = metrics.roc_auc_score(y_true, all_y_pred)

model.train()

plt.figure(1)
plt.plot([0, 1], [0, 1], 'k-')
plt.plot(fpr, tpr, label='FN(area = {:.3f})'.format(roc_auc))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title("ROC curve")
plt.legend(loc="best")
plt.show()

# Test

In [None]:
def test(loader, model):
    # Set model to eval
    model.eval()
    res = torch.tensor([], dtype=torch.int64)
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y.to(device=device)

            scores = model(x)
            predictions = scores
            res =  torch.cat((predictions, res), 0)  
    model.train()
   
    return res

In [None]:
X_test_mean
x_valid_mean=X_test_mean.values
x_valid_mean_scaled = scaler.fit_transform(x_valid_mean)
X_test_norm=pd.DataFrame(x_valid_mean_scaled)
X_test_norm

In [None]:
X_test_final = X_tensor(X_test_norm)
y_final = torch.zeros([4790,1])

test_datasets = CusDatasetLoader(X_test_final, y_final)
test_loader = DataLoader(dataset=test_datasets, batch_size=batch_size, shuffle=True)

result = test(test_loader, model)
result

In [None]:

#df.index = ['Row_1', 'Row_2', 'Row_3', 'Row_4']
res1 = pd.DataFrame(result.tolist(), columns=["Predicted"])
res1.index = X_test.index
res1.index.name = 'Id'
pd.DataFrame(res1).to_csv('out.csv')
#print(torch.count_nonzero(torch.from_numpy(np.array(res1)).to(torch.float32)))
res1