In [1]:
import numpy as np
import os, sys
import csv
import pandas as pd
import matplotlib.pyplot as plt
import time, copy
from importlib import reload

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch import optim, nn

In [2]:
combined_path = "./data/combined/"
max_vals_path = "./data/max_vals.npy"

In [5]:
max_vals = np.load(max_vals_path)
print(max_vals.shape)

(50,)


In [169]:
class FeatureDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, mode, max_path=None, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.mode = mode
        
        train = []
        test = []
        
        self.max_features = np.load(max_path)
        
        for filename in os.listdir(data_path):
            index = int(filename[:-4])
            if index % 10 == 0:
                test.append(np.load(data_path+filename))
            else:
                train.append(np.load(data_path+filename))
                
        self.data = train if mode == "train" else test
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        data = self.data[idx][1:].astype(np.float)
        data = data / self.max_features
        label = self.data[idx][0]
        
        if self.transform != None:
            data = self.transform(label)
        
        return data, label

In [170]:
train_set = FeatureDataset(combined_path,"train",max_path=max_vals_path)
train_loader = DataLoader(train_set, batch_size=16, num_workers=4, shuffle=True)

test_set = FeatureDataset(combined_path,"test",max_path=max_vals_path)
test_loader = DataLoader(test_set, batch_size=16, num_workers=4)

dataloaders = {"train": train_loader, "test": test_loader}
dataset_sizes = {"train": len(train), "test": len(test_set)}

print(dataset_sizes)


{'train': 1064, 'test': 118}


In [201]:
def train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            model.train()
    
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # zero the parameter gradients
                optimizer.zero_grad()
                inputs = inputs.float().squeeze()
                labels = labels.float().squeeze()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs).squeeze()                   
                    loss = criterion(outputs, labels)
                    preds = torch.round(outputs)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels)
                
            if phase == 'train' and scheduler != None:
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model



In [231]:
my_model = nn.Sequential(nn.Linear(50,10,bias=False),nn.Linear(10,1,bias=False), nn.Sigmoid())
criterion = nn.BCELoss()
print(my_model)

# Observe that all parameters are being optimized
# optimizer_ft = optim.SGD(my_model.parameters(), lr=0.5)
optimizer_ft = optim.Adam(my_model.parameters(),lr=0.05, weight_decay=0.0001)
lr_scheduler = None
# lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=25, gamma=1)

Sequential(
  (0): Linear(in_features=50, out_features=10, bias=False)
  (1): Linear(in_features=10, out_features=1, bias=False)
  (2): Sigmoid()
)


In [232]:
final_model = train_model(my_model, 
                                                       criterion, 
                                                       optimizer_ft, 
                                                       lr_scheduler, 
                                                       dataloaders, 
                                                       dataset_sizes,
                                                       num_epochs=100)

Epoch 0/99
----------
train Loss: 0.0410 Acc: 0.6156
test Loss: 0.0441 Acc: 0.4915

Epoch 1/99
----------
train Loss: 0.0407 Acc: 0.6259
test Loss: 0.0596 Acc: 0.5932

Epoch 2/99
----------
train Loss: 0.0383 Acc: 0.6579
test Loss: 0.0390 Acc: 0.6780

Epoch 3/99
----------
train Loss: 0.0366 Acc: 0.6842
test Loss: 0.0385 Acc: 0.7119

Epoch 4/99
----------
train Loss: 0.0383 Acc: 0.6457
test Loss: 0.0453 Acc: 0.6525

Epoch 5/99
----------
train Loss: 0.0365 Acc: 0.6682
test Loss: 0.0383 Acc: 0.7119

Epoch 6/99
----------
train Loss: 0.0350 Acc: 0.6992
test Loss: 0.0384 Acc: 0.6949

Epoch 7/99
----------
train Loss: 0.0364 Acc: 0.6748
test Loss: 0.0444 Acc: 0.5932

Epoch 8/99
----------
train Loss: 0.0341 Acc: 0.7049
test Loss: 0.0412 Acc: 0.7034

Epoch 9/99
----------
train Loss: 0.0338 Acc: 0.7124
test Loss: 0.0363 Acc: 0.7203

Epoch 10/99
----------
train Loss: 0.0335 Acc: 0.7246
test Loss: 0.0355 Acc: 0.7288

Epoch 11/99
----------
train Loss: 0.0355 Acc: 0.7002
test Loss: 0.0387 Acc

test Loss: 0.0340 Acc: 0.7627

Epoch 97/99
----------
train Loss: 0.0295 Acc: 0.7763
test Loss: 0.0333 Acc: 0.7712

Epoch 98/99
----------
train Loss: 0.0294 Acc: 0.7820
test Loss: 0.0340 Acc: 0.7458

Epoch 99/99
----------
train Loss: 0.0342 Acc: 0.7284
test Loss: 0.0364 Acc: 0.7627

Training complete in 0m 44s
Best val Acc: 0.805085


In [233]:
torch.save(final_model.state_dict(), "models/final.pth")