In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


In [97]:
import os

import torch
from torch.utils import data

class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, path_dataset, list_ids):
        'Initialization'
        self.path_dataset = path_dataset
        self.list_ids = list_ids
        self.nr_observations = len(list_ids)
        
    def __len__(self):
        'Denotes the total number of samples'
        return self.nr_observations

    def __getitem__(self, index):
        'Generates one sample of data'
        file_name_variables = os.path.join(self.path_dataset, 'variable_' + str(self.list_ids[index])  + '.pt')
        file_name_label = os.path.join(self.path_dataset, 'label_' +str( self.list_ids[index])  + '.pt')
        
        X = torch.load(file_name_variables)
        y = torch.load(file_name_label)
        
        return X, y
    

In [135]:
# https://github.com/pytorch/examples/blob/master/mnist/main.py

class Net(nn.Module):
    def __init__(self, nr_input_variables, nr_hidden_neurons, nr_output_variables):
        super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(1, 20, 5, 1)
#         self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc_input = nn.Linear(nr_input_variables, nr_hidden_neurons)
        self.fc_hidden = nn.Linear(nr_hidden_neurons, nr_hidden_neurons)
        self.fc_output = nn.Linear(nr_hidden_neurons, nr_output_variables)
        self.sigmoid = nn.Sigmoid()
        self.bn1 = nn.BatchNorm1d(nr_hidden_neurons)
        self.bn2 = nn.BatchNorm1d(nr_hidden_neurons)
        
    def forward(self, x):
        x = F.relu(self.fc_input(x))
        x = self.bn1(x)
        x = F.relu(self.fc_hidden(x))
        x = self.bn2(x)
        x = F.relu(self.fc_output(x))
    
        return self.sigmoid(x)
    
def train(log_interval, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data.float())
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def train_performance(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.float())
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTraining set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
def test_performance(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.float())
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [136]:
# https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel
# === constants === #

# === variables === #
fraction = 0.9
use_cuda = False # not args.no_cuda and torch.cuda.is_available()
seed =  1
lr = 0.001
momentum = 0.9
path_dataset = path_random_data
params = {'batch_size': 64,
          'shuffle': True}
nr_epochs = 3
log_interval = 10

# === process === #
path_model = os.path.join(path_dir, folder_data ,'model.pt')

partition = {}
partition['train'] = list(range(0,int(nr_observations*fraction)))
partition['validation'] = list(range(int(nr_observations*fraction),nr_observations))

training_set = Dataset(path_dataset, partition['train'])
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(path_dataset, partition['validation'])
validation_generator = data.DataLoader(validation_set, **params)

torch.manual_seed(seed)

kwargs = {'num_workers': 6, 'pin_memory': True} if use_cuda else {}

device = torch.device("cuda" if use_cuda else "cpu")

model = Net(nr_input_variables = nr_variables, nr_hidden_neurons = 5, nr_output_variables = 2).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

for epoch in range(1, nr_epochs + 1):
    train(log_interval, model, device, training_generator, optimizer, epoch)
    train_performance(model, device, training_generator)
    test_performance(model, device, validation_generator)

torch.save(model.state_dict(), path_model)



Training set: Average loss: -0.5679, Accuracy: 4503/9000 (50%)

Test set: Average loss: -0.5633, Accuracy: 484/1000 (48%)


Training set: Average loss: -0.6088, Accuracy: 4515/9000 (50%)

Test set: Average loss: -0.6030, Accuracy: 488/1000 (49%)


Training set: Average loss: -0.6565, Accuracy: 4525/9000 (50%)

Test set: Average loss: -0.6504, Accuracy: 490/1000 (49%)



In [112]:
import os
import numpy as np
from numpy.random import randn, randint
import torch 

folder_data = 'nn_random'
path_dir = os.path.join('/home/bmelman/Desktop/C_disk/02_university/06_thesis/01_code/fever/_04_results/score_combination/', 'neural_network')
path_random_data = os.path.join(path_dir, folder_data)

# if not os.path.exists(path_random_data):
os.makedirs(path_random_data, exist_ok=True)
    
nr_variables = 10
nr_observations = 10000

for id in range(nr_observations):
    path_id = os.path.join(path_random_data, str(id))
    
    file_name_variables = os.path.join(path_random_data, 'variable_' + str(id)  + '.pt')
    file_name_label = os.path.join(path_random_data, 'label_' + str(id)  + '.pt')
    
    np_array = randn(nr_variables)
    torch_array = torch.from_numpy(np_array)
    torch.save(torch_array, file_name_variables)
    
    torch_array = torch.tensor(float(randint(2)))
    torch.save(torch_array, file_name_label)



TypeError: only size-1 arrays can be converted to Python scalars