In [10]:
import numpy as np
import os, sys
import csv
import pandas as pd
import matplotlib.pyplot as plt
import time, copy
from importlib import reload

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch import optim, nn

In [2]:
combined_path = "./combined/"

In [3]:
class FeatureDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, mode, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.mode = mode
        
        train = []
        test = []
        
        for filename in os.listdir(data_path):
            index = int(filename[:-4])
            if index % 10 == 0:
                test.append(np.load(data_path+filename))
            else:
                train.append(np.load(data_path+filename))
                
        self.data = train if mode == "train" else test
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        data = self.data[idx][1:].astype(np.float)
        label = self.data[idx][0]
        
        if self.transform != None:
            data = self.transform(label)
        
        return data, label

In [4]:
train_set = FeatureDataset(combined_path,"train")
train_loader = DataLoader(train_set, batch_size=16, num_workers=4, shuffle=True)

test_set = FeatureDataset(combined_path,"test")
test_loader = DataLoader(test_set, batch_size=16, num_workers=4)

dataloaders = {"train": train_loader, "test": test_loader}
dataset_sizes = {"train": len(train_set), "test": len(test_set)}

print(dataset_sizes)


{'train': 1064, 'test': 118}


In [5]:
my_model = nn.Sequential(nn.Linear(50,10,bias=False), nn.Linear(10,1,bias=False), nn.Sigmoid())
criterion = nn.BCELoss()
print(my_model)
#This is such a simple linear model, definitely can be improved for better accuracy

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(my_model.parameters(), lr=0.05)
lr_scheduler = None
#lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=25, gamma=1)

Sequential(
  (0): Linear(in_features=50, out_features=10, bias=False)
  (1): Linear(in_features=10, out_features=1, bias=False)
  (2): Sigmoid()
)


In [8]:
def train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            model.train()
    
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # zero the parameter gradients
                optimizer.zero_grad()
                inputs = inputs.float().squeeze()
                labels = labels.float().squeeze()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs).squeeze()                   
                    loss = criterion(outputs, labels)
                    preds = torch.round(outputs)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels)
                
            if phase == 'train' and scheduler != None:
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model



In [9]:
final_model = train_model(my_model, 
                                                       criterion, 
                                                       optimizer_ft, 
                                                       lr_scheduler, 
                                                       dataloaders, 
                                                       dataset_sizes,
                                                       num_epochs=50)

Epoch 0/49
----------
train Loss: 3.1250 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 1/49
----------
train Loss: 3.1133 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 2/49
----------
train Loss: 3.1250 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 3/49
----------
train Loss: 3.1191 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 4/49
----------
train Loss: 3.1367 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 5/49
----------
train Loss: 3.1309 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 6/49
----------
train Loss: 3.1250 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 7/49
----------
train Loss: 3.1250 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 8/49
----------
train Loss: 3.1250 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 9/49
----------
train Loss: 3.1367 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 10/49
----------
train Loss: 3.1367 Acc: 0.5038
test Loss: 3.4251 Acc: 0.5085

Epoch 11/49
----------
train Loss: 3.1191 Acc: 0.5038
test Loss: 3.4251 Acc