In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import KFold

https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-pytorch.md

In [36]:
RAMAN_DATASET_PATH = "./data/raman_diabetis_spectroscopy/"
XENON_DATASET_PATH = "./data/xenon/"

In [37]:
dset_name = 'vein'
d = pd.read_csv(RAMAN_DATASET_PATH+dset_name+'.csv')
d = d.iloc[1:,:]
y = d.iloc[:,1].astype(int)
# Trim data
X = d.iloc[:,800:1800]
# Make data zero-mean
means = X.mean(0).to_frame().T
means = means._append([means]*20, ignore_index=True).iloc[1:]
X = X - means
# Scale down the values
# TODO

In [38]:
X_neg, y_neg = X[y==0], y[y==0]
X_pos, y_pos = X[y==1], y[y==1]


In [39]:
tensor_x = torch.tensor(X.to_numpy(dtype=float)).to(torch.float32)
tensor_y = torch.tensor(y.to_numpy(dtype=int)).to(torch.long)
dataset = torch.utils.data.TensorDataset(tensor_x, tensor_y)

In [40]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(1000, 14),
            nn.ReLU(),
            nn.Linear(14, 2),
            nn.Softmax()
        )
    
    def forward(self, x):
        return self.layers(x)

In [44]:
# loss_function = nn.CrossEntropyLoss()
loss_function = nn.MSELoss()

In [50]:
n = 5
# For fold results
results = {}
kf1 = KFold(n_splits=n, shuffle=True)
kf2 = KFold(n_splits=n, shuffle=True)
c = len(X_pos)
fold = 0
for train_pos, test_pos in kf1.split(X_pos):
    for train_neg, test_neg in kf2.split(X_neg):
        fold+=1
        # print("%s %s %s %s" % (train_pos, test_pos, train_neg+c, test_neg+c))
        train_ids = [*train_pos, *train_neg]
        test_ids = [*test_pos, *test_neg]
        # Sample elements randomly from a given list of ids, no replacement.
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
        
        # Define data loaders for training and testing data in this fold
        trainloader = torch.utils.data.DataLoader(
                        dataset, 
                        batch_size=8, sampler=train_subsampler)
        testloader = torch.utils.data.DataLoader(
                        dataset,
                        batch_size=2, sampler=test_subsampler)
        
        # Init the neural network
        network = MLP()
        
        # Initialize optimizer
        optimizer = torch.optim.Adam(network.parameters(), lr=1e-4)

        # Run the training loop for defined number of epochs
        for epoch in range(0, 30):

            # Print epoch
            print(f'Starting epoch {epoch+1}')

            # Set current loss value
            current_loss = 0.0

            # Iterate over the DataLoader for training data
            i = 1 
            for data in trainloader:
                
                # Get inputs
                inputs, targets = data
                targets = targets.int()
                # Zero the gradients
                optimizer.zero_grad()
                
                # Perform forward pass
                outputs = network(inputs)
                _, predicted = torch.max(outputs.data, 1)
                predicted = predicted.int()
                # Compute loss
                loss = loss_function(predicted, targets)
                
                # Perform backward pass
                loss.backward()
                
                # Perform optimization
                optimizer.step()
                
                # Print statistics
                current_loss += loss.item()
                if i % 2 == 1:
                    print('Loss after mini-batch %5d: %.3f' %
                        (i + 1, current_loss / 500))
                    current_loss = 0.0
                i+=1
            # Process is complete.
            print('Training process has finished. Saving trained model.')

            # Print about testing
            print('Starting testing')
            
            # Saving the model
            save_path = f'./model-fold-{fold}.pth'
            torch.save(network.state_dict(), save_path)

            # Evaluationfor this fold
            correct, total = 0, 0
            with torch.no_grad():

                # Iterate over the test data and generate predictions
                for i, data in enumerate(testloader, 0):

                    # Get inputs
                    inputs, targets = data

                    # Generate outputs
                    outputs = network(inputs)

                    # Set total and correct
                    _, predicted = torch.max(outputs.data, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()

                # Print accuracy
                print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
                print('--------------------------------')
                results[fold] = 100.0 * (correct / total)
        

Starting epoch 1


RuntimeError: "mse_cpu" not implemented for 'Int'

In [None]:
model = MLP()
model.load_state_dict(torch.load('model-fold-4.pth'))
model.eval()

MLP(
  (layers): Sequential(
    (0): Linear(in_features=1000, out_features=14, bias=True)
    (1): ReLU()
    (2): Linear(in_features=14, out_features=2, bias=True)
  )
)

In [None]:
inp = X.iloc[0]
label = y.iloc[0]
inp = torch.tensor(inp).float()
out = model(inp)
print("predicted: ", int(torch.max(out.data, 0)[1]))
print("true val:" , label)

predicted:  1
true val: 1


  inp = torch.tensor(inp).float()


In [None]:
print(outputs)
print(torch.max(outputs.data, 1))

tensor([[-14.5799,   7.9601]])
torch.return_types.max(
values=tensor([7.9601]),
indices=tensor([1]))


In [None]:
model = MLP()
model.load_state_dict(torch.load('model-fold-1.pth'))

<All keys matched successfully>

In [None]:
inp = torch.tensor(X.to_numpy()).float()
best_model = None
best_score = 0
labels = y.to_numpy()

In [None]:
for i in range(1, 26):
    model = MLP()
    model.load_state_dict(torch.load(f'model-fold-{i}.pth'))
    out = model(inp)
    _, preds = torch.max(out.data, 1)
    preds = preds.numpy()
    matches = labels[labels==preds]
    score = len(matches)/len(labels)
    print(score)
    if score > best_score:
        best_model = model
        best_score = score

0.6
0.55
0.55
0.55
0.55
0.55
0.55
0.55
0.55
0.6
0.55
0.55
0.55
0.6
0.55
0.55
0.55
0.55
0.55
0.55
0.55
0.55
0.55
0.6
0.55


In [None]:
best_score

0.6