In [18]:
from dataloader import get_dataloaders, load_samples_dataset
from samples_dataset import SamplesDataset
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# Train method

In [None]:
def train(model, dataloader, optimizer, criterion, n_epoch=10, verbose=False):
    device = next(model.parameters()).device
    mean_loss = []
    nb_examples = len(dataloader.dataset)

    model.train(True)
    for epoch in tqdm(range(n_epoch)):  # loop over the dataset multiple times
        loss_sum = 0.0
        for i, data in enumerate(dataloader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)[:,0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
        mean_loss.append(loss_sum / nb_examples)

    if verbose:
        print('Finished Training')
        plt.title('Mean error for each epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Mean error')
        plt.plot(range(1, n_epoch + 1), mean_loss)

In [20]:
def test(model, dataloader, verbose=False):
    device = next(model.parameters()).device

    model.eval()
    
    with torch.no_grad():
        accuracy = 0.0
        nb_examples = 0.0
        for i, data in enumerate(dataloader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            nb_examples += len(inputs)

            outputs = model(inputs)[:,0]
            accuracy += torch.sum(outputs == labels).item()
            
        accuracy /= nb_examples
        return accuracy

In [26]:
def cross_valisation(model, X, y, optimizer, criterion, k=10, n_epoch=50):
    scores = list()

    if len(X) != len(y):
            raise Exception(f'The size of X {len(X)} must be the same as the size y {len(y)}.')
    
    dataset_X_folds = np.array_split(X, k)
    dataset_y_folds = np.array_split(y, k)

    print(f'Satrting cross-validation with k={k}.')
    pbar = tqdm(range(k))
    for i in pbar:
        pbar.set_description('Cross-validation performed at')
        samples_train = SamplesDataset()
        dataset_X_train = np.concatenate(dataset_X_folds[0:i] + dataset_X_folds[i+1: len(dataset_X_folds)])
        dataset_y_train = np.concatenate(dataset_y_folds[0:i] + dataset_y_folds[i+1: len(dataset_y_folds)])
        samples_train.add_examples(dataset_X_train, dataset_y_train)

        samples_test = SamplesDataset()
        dataset_X_test = dataset_X_folds[i]
        dataset_y_test = dataset_y_folds[i]
        samples_test.add_examples(dataset_X_test, dataset_y_test)

        train_dataloader = DataLoader(samples_train, batch_size=64, shuffle=True, pin_memory=True)
        test_dataloader = DataLoader(samples_test, batch_size=64, shuffle=True, pin_memory=True)

        train(model=model, dataloader=train_dataloader, optimizer=optimizer, criterion=criterion, n_epoch=n_epoch)
        score = test(model=model, dataloader=test_dataloader)

        scores.append(score)

    return np.array(scores)

# Main

In [29]:

# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = 'cpu'

n_hidden = 700
model = nn.Sequential(
    nn.Linear(70, n_hidden),
    nn.ReLU(),
    nn.Linear(n_hidden, 1),
    nn.Sigmoid()
)

model.to(device)

print(model)
print(f'Model on device : {next(model.parameters()).device}.')

criterion = nn.MSELoss(reduction='sum')
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


full_dataset = load_samples_dataset()
full_dataset.shuffle()
dataset_X, dataset_y = full_dataset.get_row_dataset()
score = cross_valisation(model=model, X=dataset_X, y=dataset_y, optimizer=optimizer, criterion=criterion, k=6)

print(f'Score mean : {score.mean()}')
print(f'Score standard deviation : {score.std()}')

Sequential(
  (0): Linear(in_features=70, out_features=700, bias=True)
  (1): ReLU()
  (2): Linear(in_features=700, out_features=1, bias=True)
  (3): Sigmoid()
)
Model on device : cpu.
Creating positive examples.


  0%|          | 0/649 [00:00<?, ?it/s]

Creating negative examples.


  0%|          | 0/7326 [00:00<?, ?it/s]

Satrting cross-validation with k=6.


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]