In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt 
import numpy as np
import random
import json
import time

import math
from models import LogisticRegressor

In [2]:
N_EPOCHS = 10000
N_FEATURES = 5

In [3]:
data = []

n_true, n_false = 0, 0

def construct_tensors(vector, state):
    vector = vector[:N_FEATURES]
    norm = np.linalg.norm(vector)
    x = torch.FloatTensor(vector / norm)
    y = torch.tensor(state)
    return (x, y)

with open('data/mfccs.json') as f:
    json_data = json.load(f)

    # Loop over all videos in dataset
    for video in json_data:
        video_data = json_data[video]

        # Loop over all bounces for current video
        for feature_vector in video_data['true']:
            if feature_vector != []:
                data.append(construct_tensors(feature_vector, 1))
                n_true += 1

        for feature_vector in video_data['false']:
            if feature_vector != [] and n_false < n_true:
                data.append(construct_tensors(feature_vector, 0))
                n_false += 1

print(f'Extracted {len(data)} datapoints ({n_true} true and {n_false} false)')

Extracted 1840 datapoints (920 true and 920 false)


In [4]:
# Split dataset into train and test (85% train)
np.random.shuffle(data)
split_ind = int(len(data) * 0.70)
train_data = data[:split_ind]
test_data = data[split_ind:]

In [5]:
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=128,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                           batch_size=128,
                                           shuffle=True)

In [6]:
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print(f'Device: {device}')

Device: cuda:0


In [7]:
regressor = LogisticRegressor(N_FEATURES, 2).to(device)
print(f'n_params={sum(p.numel() for p in regressor.parameters())}')

n_params=582


In [8]:
optimizer = optim.SGD(regressor.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [9]:
def accuracy(model, data):
    correct, total = 0, 0
    for x, y in data:
        y_hat = model(x.to(device))
        _, predicted = torch.max(y_hat.data, 1)
        
        total += y.size(0)
        correct += (predicted.cpu() == y).sum()
    return 100 * correct / total


In [10]:
accuracy_history = []
n_iters = 0
start = time.time()

for epoch in range(1, N_EPOCHS + 1):
    np.random.shuffle(data)
    for i, (x, y) in enumerate(train_loader):
        # Clear gradients
        optimizer.zero_grad()

        # Make prediction and calculate loss
        y_hat = regressor(x.to(device)).cpu()
        loss = criterion(y_hat, y)

        # Calculate new gradients and optimize
        loss.backward()
        optimizer.step()

        if n_iters % 1000 == 0 or epoch == N_EPOCHS:
            acc = accuracy(regressor, test_loader)
            
            fepoch = 'Epoch: \033[92m{}\033[0m/\033[92m{}\033[0m'.format(epoch, N_EPOCHS)
            fiters = 'Iter: \033[92m{}\033[0m'.format(n_iters)
            floss = 'Loss: \033[92m{:.8f}\033[0m'.format(loss.item())
            facc = 'Accuracy: \033[92m{:.2f}%\033[0m'.format(acc)
            fptime = 'Time: \033[92m{:.0f}\033[0ms'.format(time.time() - start)

            print('\r{}\t | {}\t | {}, {}\t | {}\t |'.format(fepoch, fiters, floss, facc, fptime), end="")
        n_iters += 1

Epoch: [92m953[0m/[92m1000[0m	 | Iter: [92m20000[0m	 | Loss: [92m0.68314832[0m, Accuracy: [92m68.00%[0m	 | Time: [92m22[0ms	 |