In [1]:
import torch
import numpy as np
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from torch.autograd import Variable
from torch.utils.data import DataLoader

from model import MetaClassifier
from model import MetaWithVar
from model import DirectConnect
from model import MetaClassifier4Layers
from dataset import NetParasDataset
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from scipy.signal import savgol_filter
import scipy.stats as st
from matplotlib import ticker
import seaborn as sns
import prettytable

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [2]:
#Define the evaluation on the test set
def eval_model(model, val_loader):
    model.eval()
    total_corr = 0
    labels = []
    predictions = []
    mseloss = nn.MSELoss()
    running_loss = 0
    correct = 0

    for i,data in enumerate(val_loader):
        l1,l2,l3,l4,label = data.get("L1"), data.get("L2"), data.get("L3"), data.get("L4"), data.get("Label")
        y_pred = model(l1.float(), l2.float(), l3.float(), l4.float())[0].squeeze()
                
        rl = mseloss(y_pred, label)
        running_loss += rl.item()
        print('BCE loss:' + str(rl.item()))
        
        y_pred = y_pred.detach().numpy()
        label = label.detach().numpy()
        predictions = np.append(predictions, y_pred)
        labels = np.append(labels, label)
        
        x = np.array([i for i in range(len(label))])
        plt.plot(label, "o:", linestyle='--', label='Target label')
        plt.fill_between(x, label - 0.1, label + 0.1, alpha = 0.3 )
        plt.plot(y_pred, "o:", linestyle='-', label='Predictions')
        plt.gca().yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
        plt.xticks(range(0,200,5))
        plt.xlabel('Index')
        plt.ylabel('Value: Percentage of high income people')
        plt.legend(loc = 'best')
        fig = plt.gcf()
        fig.set_size_inches(20, 4)
        plt.savefig('Predictions', dpi=500, bbox_inches='tight')
        plt.show()
    for i in range(len(labels)):
        if abs(labels[i]-predictions[i] <= 0.1):
            correct += 1
    
    print('Accuracy:' + str(float(correct/200)))
    
    d = np.fabs(labels - predictions)
    plt.scatter(labels, d, c = d, alpha = 0.4)
    plt.colorbar()
    plt.gca().xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
    plt.gca().yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
    plt.xlabel('Target label')
    plt.ylabel('Absolute Pure Error')
    plt.savefig('Predictions and target', dpi=500, bbox_inches='tight')
    plt.show()
    
    
    plt.hist(labels - predictions,bins=50)
    plt.gca().xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
#     sns.distplot(labels - predictions, color='r')
    plt.xlabel('Pure Error')
    plt.ylabel('Number of samples')
    plt.savefig('Quantities', dpi=500, bbox_inches='tight')
    plt.show()
    
    plt.hist(labels - predictions,bins=50, density=True, cumulative=True, label='CDF', histtype='step')    
    plt.gca().xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
    plt.show()
    
    

In [3]:
def get_ln(model, idx, lyn):
    model.eval()
    l1_10,l2_10,l3_10,l4_10 = torch.load('meta_train/' + str(idx) +'.pt')
    n1 = model(l1_10.unsqueeze(0).float(), l2_10.unsqueeze(0).float(), l3_10.unsqueeze(0).float(), l4_10.unsqueeze(0).float())[lyn]
    n1 = n1.detach().numpy().reshape(-1,1)
    return n1

In [4]:
def get_ln_all(model, label, lyn):
    n = get_ln(model, label, lyn)
    for i in range(19):
        n = np.concatenate((n, get_ln(model, label + i*100 +100, lyn)),axis = 0)
    return n

In [5]:
def plt_ln(model, lyn):
    n_10 = get_ln_all(model, 10, lyn)
    n_30 = get_ln_all(model, 30, lyn)
    n_50 = get_ln_all(model, 50, lyn)
    n_70 = get_ln_all(model, 70, lyn)
    n_90 = get_ln_all(model, 90, lyn)
    
    plt.hist(n_10,bins=50, density=True, cumulative=True, label='10%', histtype='step')  
    plt.hist(n_30,bins=50, density=True, cumulative=True, label='30%', histtype='step')  
    plt.hist(n_50,bins=50, density=True, cumulative=True, label='50%', histtype='step')
    plt.hist(n_70,bins=50, density=True, cumulative=True, label='70%', histtype='step')  
    plt.hist(n_90,bins=50, density=True, cumulative=True, label='90%', histtype='step')
    
    plt.scatter(np.mean(n_10), 0, c='C0',marker='x')
    plt.text(np.mean(n_10),0, 'mean of 10%', rotation=45)
    plt.scatter(np.mean(n_50),0, c='orange',marker='x')
    plt.text(np.mean(n_50),0, 'mean of 50%', rotation=45)
    plt.scatter(np.mean(n_90),0, c='forestgreen', marker='x')
    plt.text(np.mean(n_90),0, 'mean of 90%', rotation=45)
    
    plt.scatter(np.mean(n_30),0,marker='x')
    plt.text(np.mean(n_30),0, 'mean of 30%', rotation=45)
    plt.scatter(np.mean(n_70),0, marker='x')
    plt.text(np.mean(n_70),0, 'mean of 70%', rotation=45)
    
    plt.legend(loc='upper left')
    plt.xlabel('The value of the weights of L' + str(lyn))
    plt.ylabel('cdf')
    plt.show()
    
    plt.hist(n_10,bins=30, density=True,  label='10%', histtype='step')  
    plt.hist(n_50,bins=30, density=True,  label='50%', histtype='step')
    plt.hist(n_90,bins=30, density=True,  label='90%', histtype='step')
    
    plt.legend(loc='upper left')
    plt.xlabel('The value of the weights of L' + str(lyn))
    plt.ylabel('pdf')
    plt.show()
    
    table = prettytable.PrettyTable()
    table.field_names = ['Percentage','Mean','Median','Variance']
    table.add_row(['10%',np.mean(n_10),np.median(n_10),np.var(n_10)])
    table.add_row(['50%',np.mean(n_50),np.median(n_50),np.var(n_50)])
    table.add_row(['90%',np.mean(n_90),np.median(n_90),np.var(n_90)])
    print(table)

In [6]:
def plt_l1(model):
    plt_ln(model, 1)
    plt_ln(model, 2)
    plt_ln(model, 3)
    plt_ln(model, 4)

In [7]:
train_dataset, test_dataset = torch.utils.data.random_split(NetParasDataset('meta_train/'), [1800, 200])
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=200, shuffle=True)

In [8]:
model = MetaWithVar()
# criterion = nn.MSELoss()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.02)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum = 0.5)
model.train()
loss_list = []

for epoch in range(50):
    running_loss = 0 
    for sample in train_loader:
        optimizer.zero_grad()
        output = model(sample.get("L1"), sample.get("L2"), sample.get("L3"), sample.get("L4"))[0].squeeze()
#         print(output.shape)
#         print(sample.get("Label").shape)
        print(output)
        print(sample.get("Label"))
        loss = criterion(output, sample.get("Label").float())
        loss.backward()
        optimizer.step()  
        running_loss += loss.item()
        # print('running loss:' + str(running_loss))
    loss_list.append(running_loss*16/840)
    plt.plot(loss_list)
print("The last loss:{}".format(loss_list[len(loss_list) - 1]))        
plt.xlabel('Epochs')
plt.ylabel('BCE Loss')
plt.savefig('Loss.png', dpi=500, bbox_inches='tight')
plt.show()
eval_model(model, test_loader)
plt_l1(model)

# print('Accuracy on train set: {}'.format(accuracy(model, train_loader))) 
# print('Accuracy on test set: {}'.format(accuracy(model, test_loader))) 

torch.Size([16, 1])
torch.Size([16, 1])
tensor([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       grad_fn=<SqueezeBackward0>)
tensor([0.4000, 0.7200, 0.9700, 0.2700, 0.8800, 0.7200, 0.0000, 0.5400, 0.5000,
        0.0000, 0.5600, 0.5400, 0.3500, 0.5200, 0.9300, 0.9100],
       dtype=torch.float64)


RuntimeError: all elements of input should be between 0 and 1