In [None]:
import h5py
import numpy as np
import torch
import csv
import torch.nn.functional as F
from torch import 
from torch import optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
f_train = h5py.File('X_train_new.h5', 'r')
f_test = h5py.File('X_test_new.h5', 'r')

X_train = f_train['features']
X_test = f_test['features']

X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

print('X train shape: ', X_train.shape)
print('X_test_shape', X_test.shape)

y_train = [0] * 946

with open('y_train_AvCsavx.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
       # print(row['id'], row['label'])
       y_train[int(row['id'])] = int(row['label'])

In [None]:
X_train_segm = np.reshape(X_train, [X_train.shape[0] * X_train.shape[1], 1, X_train.shape[3], X_train.shape[2]])
X_train_segm = torch.from_numpy(X_train_segm)
nr_train_examples = X_train_segm.shape[0]
print("Number of training examples: ", str(nr_train_examples))


# train/val split
X_train_splitted = X_train_segm[:-( nr_train_examples // 10)]
X_val_splitted = X_train_segm[-(nr_train_examples // 10):]

print('X train after split: ', X_train_splitted.shape)
print('X val after split: ', X_train_splitted.shape)


# data normalization
mean = torch.mean(X_train_splitted, (0, 1, 2), keepdim=True)
std = torch.std(X_train_splitted, (0, 1, 2), keepdim=True)
X_train_splitted = (X_train_splitted - mean) / (std + 1e-8)
X_train_splitted = X_train_splitted.to(device=device)

X_val_splitted = (X_val_splitted - mean) / (std + 1e-8)
X_val_splitted = X_val_splitted.to(device=device)


# extend y_train labels to every segment (instead of every patient)
y_train_segm = np.repeat(y_train, 40)
y_train_segm = torch.from_numpy(y_train_segm)

y_train_splitted = y_train_segm[:-(nr_train_examples // 10)].to(device=device)
y_val_splitted = y_train_segm[-(nr_train_examples // 10):].to(device=device)

nr_cls_1_train = torch.sum(y_train_splitted).item()
nr_cls_0_train =  y_train_splitted.shape[0] - nr_cls_1_train

print("Number of examples of class 1 = ", nr_cls_1_train)
print("Number of examples of class 0 = ", nr_cls_0_train)

In [None]:
# oversample from the minority class
X_train_splitted = X_train_splitted.cpu().numpy()
y_train_splitted = y_train_splitted.cpu().numpy()

n_cls1 = np.sum(y_train_splitted)
n_cls0 = len(y_train_splitted) - np.sum(y_train_splitted)
diff_segm = n_cls0 - n_cls1


# sample with replacement diff_segm times from the class 1 segm samples
indices_cls1_splitted = np.where(np.asarray(y_train_splitted)==1)[0]
indices_cls1_splitted_oversampled = np.random.choice(indices_cls1_splitted, size=diff_segm, replace=True)
X_train_cls1_splitted_oversampled = X_train_splitted[indices_cls1_splitted_oversampled]


# no Gaussian noise added
X_train_cls1_splitted_oversampled_noise = X_train_cls1_splitted_oversampled #+ np.random.normal(0., 1.5, X_train_cls1_splitted_oversampled.shape)
y_train_cls1_splitted_oversampled = [1] * diff_segm


X_train_concat = np.vstack((X_train_splitted, X_train_cls1_splitted_oversampled_noise))
y_train_concat = np.concatenate([np.asarray(y_train_splitted), np.asarray(y_train_cls1_splitted_oversampled)])
print('X train after concat: ', X_train_concat.shape)
print('y train after concat: ', y_train_concat.shape)


# convert to torch tensors
X_train_concat = torch.from_numpy(X_train_concat).type(torch.FloatTensor).to(device = device)
y_train_concat = torch.from_numpy(y_train_concat).to(device = device)

In [None]:
class Net_CNN_1(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
                
        self.conv1 = nn.Conv2d(1, 100, (5, 3), padding=(0, 1)) #(3, 3)
        self.pool1 = nn.MaxPool2d((4, 1)) #(2, 2)
        self.drop1 = nn.Dropout(0.25)

        self.conv2 = nn.Conv2d(100, 100, (5, 3), padding=(0, 1)) #(3, 3)
        self.pool2 = nn.MaxPool2d((4, 1)) #(2, 2)
        self.drop2 = nn.Dropout(0.25)
        
        self.conv3 = nn.Conv2d(100, 100, (9, 3), padding=(0, 1))  #(2, 3)
        self.pool3 = nn.MaxPool2d((10, 1)) #(2, 2)
        
        self.fc = nn.Linear(100 * 2 * 7, 2, bias = False)

    def forward(self, x):
                
        x = self.drop1(self.pool1(F.relu(self.conv1(x))))
        x = self.drop2(self.pool2(F.relu(self.conv2(x))))
        x = self.pool3(F.relu(self.conv3(x)))
     
        x = torch.flatten(x, start_dim=1)

        x = self.fc(x)
        return x

In [None]:
class Net_CNN_2(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 100, (5, 3), padding=(0, 1))
        self.pool1 = nn.MaxPool2d((4, 1))
        self.drop1 = nn.Dropout(0.25)
        
        self.conv2 = nn.Conv2d(100, 100, (5, 3), padding=(0, 1)) 
        self.pool2 = nn.MaxPool2d((4, 1))
        
        self.conv3 = nn.Conv2d(100, 100, (9, 3), padding=(0, 1)) 
        self.pool3 = nn.MaxPool2d((10, 1))
        
        self.fc = nn.Linear(100 * 2 * 7, 2, bias = True)

    def forward(self, x):
         
        x = self.drop1(self.pool1(F.relu(self.conv1(x))))
        x = self.pool2(F.relu(self.conv2(x))) 
        x = self.pool3(F.relu(self.conv3(x)))
        
        x = torch.flatten(x, start_dim=1)
        
        x = self.fc(x)
        return x

In [None]:
#https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

net = Net_CNN_2()
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.0001)

batch_size = 64
print_every_n = 500

for epoch in range(150):
        
    perm = np.random.permutation(X_train_concat.shape[0])
    perm = torch.from_numpy(perm).to(device=device)
    
    X_train_concat = X_train_concat[perm]#.to(device)
    y_train_concat = y_train_concat[perm]#.to(device)
    
    running_loss = 0.0
    total_acc = 0.0
    acc_print = 0.0
    
    for batch_i in range(X_train_concat.shape[0] // batch_size):
        inputs = X_train_concat[batch_i * batch_size:(batch_i+1) * batch_size]
        labels = y_train_concat[batch_i * batch_size:(batch_i+1) * batch_size]

        optimizer.zero_grad()
        outputs = net(inputs)
                
        loss = torch.mean(criterion(outputs, labels))
        loss.backward()
        optimizer.step()
        
        pred = outputs.data.max(1, keepdim=True)[1]
        correct = pred.eq(labels.view_as(pred)).cuda().sum()
        acc = ((correct * (1.0)) / ((1.0) * labels.shape[0]))
        total_acc += acc.item()
        acc_print += acc.item()

        # print statistics
        running_loss += loss.item()
        if batch_i % print_every_n == 0:   
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, batch_i + 1, running_loss / print_every_n))

            print('[%d, %5d] acc: %.3f' %
                  (epoch + 1, batch_i + 1, acc_print / print_every_n))           
            
            running_loss = 0.0
            acc_print = 0.0
    
    print("Accuracy train epoch ", epoch, ": ", total_acc /  (X_train_concat.shape[0] // batch_size))

    total_acc_val = 0.0
    total_loss_val = 0.0
    batch_size_val = 16
    
    with torch.no_grad():
        for batch_i in range(X_val_splitted.shape[0] // batch_size_val):

            inputs_val = X_val_splitted[batch_i * batch_size_val:(batch_i+1) * batch_size_val]#.to(device)
            labels_val = y_val_splitted[batch_i * batch_size_val:(batch_i+1) * batch_size_val]#.to(device)
            outputs_val = net(inputs_val)
            
            outputs_val = F.softmax(outputs_val, dim = 1)

            '''
            outputs_val[:, 0] = outputs_val[:, 0] * ((nr_cls_0_train * 1.0) + nr_cls_1_train) / (1.0 * nr_cls_0_train)
 
            outputs_val[:, 1] = outputs_val[:, 1] * ((nr_cls_0_train * 1.0) + nr_cls_1_train) / (1.0 * nr_cls_1_train)
    
                    
            temp = outputs_val[:, 0]
            
            outputs_val[:, 0] = temp / (temp + outputs_val[:, 1])
            
            outputs_val[:, 1] = 1 - outputs_val[:, 0] 
            
            '''
            
            if batch_i%100 == 0:       
                print(outputs_val[:5])
            
            pred = outputs_val.data.max(1, keepdim=True)[1]
            correct = pred.eq(labels_val.view_as(pred)).cuda().sum()
            acc_val = ((correct * (1.0)) / ((1.0) * labels_val.shape[0]))
            loss_val = torch.mean(criterion(outputs_val, labels_val)).item()
            total_acc_val += acc_val
            total_loss_val += loss_val

        print("Accuracy val epoch: ", epoch, ": ", (total_acc_val /  (X_val_splitted.shape[0] // batch_size_val)))
        print("Loss val epoch ", epoch, ": ", total_loss_val / (X_val_splitted.shape[0] // batch_size_val))


print('Finished Training')

In [None]:
X_test_segm = torch.from_numpy(np.reshape(X_test, [X_test.shape[0] * X_test.shape[1], 1, X_test.shape[3], X_test.shape[2]]))
X_test_segm = (X_test_segm - mean) / (std + 1e-8)
X_test_segm = X_test_segm.to(device)

batch_size = 40
preds = []

with torch.no_grad():
    for batch_i in range(X_test_segm.shape[0] // batch_size):
        inputs = X_test_segm[batch_i * batch_size:(batch_i+1) * batch_size]
        outputs_test = F.softmax(net(inputs), dim=1)

        '''
        outputs_test[:, 0] = outputs_test[:, 0] * ((nr_cls_0_train * 1.0) + nr_cls_1_train) / (1.0 * nr_cls_0_train)

        outputs_test[:, 1] = outputs_test[:, 1] * ((nr_cls_0_train * 1.0) + nr_cls_1_train) / (1.0 * nr_cls_1_train)

        temp = outputs_test[:, 0]

        outputs_test[:, 0] = temp / (temp + outputs_test[:, 1])

        outputs_test[:, 1] = 1 - outputs_test[:, 0]        
        '''
        
        preds += outputs_test.data[:, 1]
    
    
preds = np.asarray(preds)
preds = np.reshape(preds, (-1, 40))
preds = np.mean(preds, axis=1)

preds = (np.sign(preds-0.5) + 1)//2
preds = [int(i) for i in preds]

out = open('CNN_150epochs_oversampling_noThreshold.csv', "w")
out.write("id,label\n")
rows = ['']*len(preds)
for num in range(len(preds)):
    rows[num]='%d,%d\n' % (num, preds[num])
out.writelines(rows)
out.close()