In [17]:
import numpy as np
import os, sys
import csv
import pandas as pd
import matplotlib.pyplot as plt
import time, copy
from importlib import reload

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch import optim, nn

In [18]:
combined_path = "./data/combined/"
max_vals_path = "./data/max_vals.npy"

In [19]:
max_vals = np.load(max_vals_path)
print(max_vals)

example = np.load(combined_path + "255.npy")
print(example)

[4.83846143e+03 7.32256397e+03 4.54205127e+03 4.23384619e+03
 4.35384619e+03 7.90871777e+03 8.27025684e+03 6.21897412e+03
 6.23282031e+03 6.60512842e+03 5.28617189e+03 7.74127599e+03
 5.04152043e+03 4.94588142e+03 5.15734374e+03 3.60216976e+06
 9.17141303e+06 1.10068438e+06 1.05882940e+06 1.17023543e+06
 5.27384619e+03 7.77179492e+03 5.02282056e+03 4.93000000e+03
 5.14538477e+03 6.97277984e+03 7.43616581e+02 1.46733224e+02
 3.27454090e+01 1.86658591e+01 1.92897892e+04 5.62001199e+03
 2.05430640e+03 1.00586385e+03 2.35064302e+02 1.35319137e+03
 3.07286039e+02 1.33131847e+02 8.05779346e+01 4.93367400e+01
 1.14952310e+03 2.76171629e+02 1.22808444e+02 3.07193409e+01
 2.29274350e+01 9.83024695e+02 2.40093994e+02 1.13710491e+02
 3.39073937e+01 2.46449293e+01]
[1.00000000e+00 4.21794873e+03 4.12820508e+03 3.99435889e+03
 4.10205127e+03 4.16153857e+03 4.35435889e+03 4.36051270e+03
 4.52717969e+03 4.23538477e+03 4.28923096e+03 4.27457931e+03
 4.25288463e+03 4.19854167e+03 4.15399840e+03 4.22287

In [20]:
class FeatureDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, mode, max_path=None, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.mode = mode
        
        train = []
        test = []
        
        self.max_features = np.load(max_path)
        
        for filename in os.listdir(data_path):
            index = int(filename[:-4])
            if index % 10 == 0:
                test.append(np.load(data_path+filename))
            else:
                train.append(np.load(data_path+filename))
                
        self.data = train if mode == "train" else test
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        #print("HELLO")
        #print("This is shape", np.array(self.data).shape)
        data = self.data[idx][1:].astype(np.float)
        data = data / self.max_features
        #why are we dividing by the max?
        label = self.data[idx][0]
        #stored as the label
        
        if self.transform != None:
            data = self.transform(label)
        return data, label

In [21]:
train_set = FeatureDataset(combined_path,"train",max_path=max_vals_path)
train_loader = DataLoader(train_set, batch_size=16, num_workers=4, shuffle=True)

test_set = FeatureDataset(combined_path,"test",max_path=max_vals_path)
test_loader = DataLoader(test_set, batch_size=16, num_workers=4)

dataloaders = {"train": train_loader, "test": test_loader}
dataset_sizes = {"train": len(train_set), "test": len(test_set)}

print(dataset_sizes)


{'train': 1064, 'test': 118}


In [22]:
for i in range(4):
    #print(i)
    x = train_set[i]
    #print(x)

In [23]:
def train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            model.train()
    
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # zero the parameter gradients
                optimizer.zero_grad()
                #print("prev input.shape{}".format(inputs.shape))
                inputs = inputs.float().squeeze()
                #print("after input.shape{}".format(inputs.shape))
                #print("prev labels.shape{}".format(labels.shape))
                labels = labels.float().squeeze()
                #print("after labels.shape{}".format(labels.shape))

                # forward
                # Only calculate gradient for training
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs).squeeze()  
                    #calcuting the loss based on the BCE Criterion
                    loss = criterion(outputs, labels)
                    preds = torch.round(outputs)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels)
                
            if phase == 'train' and scheduler != None:
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model



In [26]:
my_model = nn.Sequential(nn.Linear(50,10,bias=False),nn.Linear(10,1,bias=False), nn.Sigmoid())
criterion = nn.BCELoss()
print(my_model)

# Observe that all parameters are being optimized
# optimizer_ft = optim.SGD(my_model.parameters(), lr=0.5)
optimizer_ft = optim.Adam(my_model.parameters(),lr=0.05, weight_decay=0.0001)
lr_scheduler = None
# lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=25, gamma=1)

Sequential(
  (0): Linear(in_features=50, out_features=10, bias=False)
  (1): Linear(in_features=10, out_features=1, bias=False)
  (2): Sigmoid()
)


In [27]:
final_model = train_model(my_model, 
                                                       criterion, 
                                                       optimizer_ft, 
                                                       lr_scheduler, 
                                                       dataloaders, 
                                                       dataset_sizes,
                                                       num_epochs=100)

Epoch 0/99
----------
train Loss: 0.0443 Acc: 0.5103
test Loss: 0.0475 Acc: 0.5085

Epoch 1/99
----------
train Loss: 0.0392 Acc: 0.6419
test Loss: 0.0388 Acc: 0.6695

Epoch 2/99
----------
train Loss: 0.0394 Acc: 0.6325
test Loss: 0.0389 Acc: 0.6610

Epoch 3/99
----------
train Loss: 0.0385 Acc: 0.6410
test Loss: 0.0392 Acc: 0.7119

Epoch 4/99
----------
train Loss: 0.0382 Acc: 0.6692
test Loss: 0.0443 Acc: 0.4915

Epoch 5/99
----------
train Loss: 0.0361 Acc: 0.6936
test Loss: 0.0384 Acc: 0.7119

Epoch 6/99
----------
train Loss: 0.0373 Acc: 0.6739
test Loss: 0.0379 Acc: 0.6780

Epoch 7/99
----------
train Loss: 0.0344 Acc: 0.7002
test Loss: 0.0444 Acc: 0.6017

Epoch 8/99
----------
train Loss: 0.0375 Acc: 0.6795
test Loss: 0.0402 Acc: 0.6949

Epoch 9/99
----------
train Loss: 0.0366 Acc: 0.6880
test Loss: 0.0379 Acc: 0.7288

Epoch 10/99
----------
train Loss: 0.0350 Acc: 0.6870
test Loss: 0.0369 Acc: 0.7458

Epoch 11/99
----------
train Loss: 0.0352 Acc: 0.6880
test Loss: 0.0438 Acc

test Loss: 0.0364 Acc: 0.7627

Epoch 97/99
----------
train Loss: 0.0287 Acc: 0.7632
test Loss: 0.0342 Acc: 0.7966

Epoch 98/99
----------
train Loss: 0.0295 Acc: 0.7575
test Loss: 0.0398 Acc: 0.7119

Epoch 99/99
----------
train Loss: 0.0289 Acc: 0.7716
test Loss: 0.0338 Acc: 0.7966

Training complete in 0m 18s
Best val Acc: 0.796610


In [233]:
torch.save(final_model.state_dict(), "models/final.pth")