In [1]:
import sys 
sys.path.append("../")

import pandas as pd  
import numpy as np
from tqdm import tqdm
from IPython.display import display

import torch
import torch.nn as nn
import torch.nn.functional as F
from pgtaa.core.predictor_preproc import dl_from_spec

In [26]:
#from sklearn.svm import SVR
#from sklearn.ensemble import RandomForestRegressor
#from sklearn.linear_model import SGDRegressor
#from sklearn.manifold import TSNE
#from sklearn.externals import joblib 

# Linear network
class PredDense(nn.Module):
    def __init__(self, input_dim=3200, hidden_dim=(512, 128, 32), output_dim=8, dropout=0.6):
        super(PredDense, self).__init__()
        self.lin1 = nn.Linear(input_dim, hidden_dim[0])
        self.lin2 = nn.Linear(hidden_dim[0], hidden_dim[1])
        self.lin3 = nn.Linear(hidden_dim[1], hidden_dim[2])
        self.lin = nn.Linear(hidden_dim[2], output_dim, bias=False)
        self.relu = nn.ReLU(inplace=False)
        self.dropout = nn.Dropout(p=dropout, inplace=False)
    
    def forward(self, x):
        x = self.lin1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.lin2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.lin3(x)
        x = self.relu(x)
        x = self.lin(x)
        return x
    
net = PredDense().double()
crit = nn.MSELoss()
opt = torch.optim.Adam(net.parameters(), lr=1e-3)   

torch.backends.cudnn.benchmark = True

def fit(model, criterion, optimizer, batch_size: int=64, epochs: int=30, device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')):
    model.to(device)
    dl_train, dl_valid = dl_from_spec(batch_size=batch_size, num_workers=4)
    for epoch in range(epochs):
        train_loss = 0.0
        val_loss = 0.0
        pbar = tqdm(dl_train, file=sys.stdout)
        for i, data in enumerate(pbar):
            pbar.set_description("Epoch %s" % str(epoch + 1))
            inputs, label = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs.view(batch_size,-1))
            loss = criterion(outputs, label)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # validation
        with torch.set_grad_enabled(False):
            for j, vdata in enumerate(dl_valid):
                vinput, vlabel = vdata[0].to(device), vdata[1].to(device)
                outputs = model(vinput.view(1, -1))
                loss = criterion(outputs, vlabel)
                val_loss += loss.item()

        print(f"Train Loss: {train_loss / (i + 1)}  Validation Loss: {val_loss / (j + 1)}\n")
        #pbar.set_postfix(dict(t_loss=(train_loss / (i + 1)), v_loss=(val_loss / (j + 1)))
        train_loss = 0.0
        val_loss = 0.0

    print("Finished training")
    
fit(net, crit, opt)

Epoch 1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:03<00:00, 11.99it/s]
Train Loss: 0.007633544843801862  Validation Loss: 0.011227294668233601

Epoch 2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:03<00:00, 12.06it/s]
Train Loss: 0.001265337744713915  Validation Loss: 0.0019773088744775877

Epoch 3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

In [21]:
isinstance(opt, torch.optim.Optimizer)

True

In [32]:
# Sklearn Action!!!

import sklearn
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

In [33]:
gbr = GradientBoostingRegressor(learning_rate=0.01, criterion="mse", validation_fraction=0)

In [70]:
dl_train, dl_valid = dl_from_spec(batch_size=1, num_workers=4)
np.array(dl_train.dataset[:][1].data)[:,1].shape
gbr.fit(np.array(dl_train.dataset[:][0].data).reshape(len(dl_train),-1), np.array(dl_train.dataset[:][1].data)[:,1])

GradientBoostingRegressor(alpha=0.9, criterion='mse', init=None,
             learning_rate=0.01, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, n_iter_no_change=None, presort='auto',
             random_state=None, subsample=1.0, tol=0.0001,
             validation_fraction=0, verbose=0, warm_start=False)

In [69]:
np.array(dl_train.dataset[:][0].data).reshape(len(dl_train),-1).shape

(2392, 3200)

In [74]:
gbr.predict(np.array(dl_valid.dataset[:][0].data).reshape(len(dl_train),-1), np.array(dl_valid.dataset[:][1].data)[:,1])

ValueError: cannot reshape array of size 1660800 into shape (2392,newaxis)

In [219]:
# Regression metrices
import numpy as np
from sklearn.metrics import cohen_kappa_score, hinge_loss
import torch.nn.functional as F
from torch import Tensor

__all__ = ['error_rate', 'accuracy', 'accuracy_thresh', 'dice', 'exp_rmspe', 'fbeta']

def mae(pred: Tensor, targ: Tensor):
    "Mean absolute error between `pred` and `targ`."
    return torch.abs(targ - pred).mean()

def mse(pred:Tensor, targ:Tensor):
    "Mean squared error between `pred` and `targ`."
    diff = (targ - pred) ** 2
    return diff.mean()

def rmse(pred:Tensor, targ:Tensor):
    "Root mean squared error between `pred` and `targ`."
    return torch.sqrt(mse(pred, targ))

def explained_variance(pred: Tensor, targ: Tensor):
    "Explained variance score between `pred` and `targ`."
    var_pct = torch.var(targ - pred) / torch.var(targ)
    return 1 - var_pct

def msle(pred: Tensor, targ: Tensor):
    "Mean squared logarithmic error between `pred` and `targ`."
    targ = torch.log(1 + targ)
    pred = torch.log(1 + pred)
    diff = (targ - pred) ** 2
    return diff.mean()

def r2_score(pred: Tensor, targ: Tensor):
    "R2 score (coefficient of determination) between `pred` and `targ`."
    u = torch.sum((targ - pred) ** 2)
    d = torch.sum((targ - targ.mean()) ** 2)
    return 1 - u / d

In [103]:
pred = torch.rand(10)
targ = torch.rand(10)
r2_score(pred, targ)

tensor(-0.6481)

In [253]:
# Classification metrices

def accuracy(input:Tensor, targs: Tensor):
    "Compute accuracy with `targs` when `input` is bs * n_classes."
    n = targs.shape[0]
    input = input.argmax(dim=-1).view(n,-1)
    targs = targs.view(n,-1)
    return (input == targs).float().mean()

def accuracy_balanced(input: Tensor, targs: Tensor, clw: list=None):
    "Balanced accuracy score between `input` and `targs` w.r.t. class label weights `cw`."
    n = targs.shape[0]
    if not cw:
        cw = [1 for _ in range(n)]
    clw = Tensor(clw).view(1,-1).transpose(1, 0)
    input = input.argmax(dim=-1).view(n,-1)
    targs = targs.view(n,-1)
    clw = clw / (torch.sum((input == targs).float() * clw))
    return torch.sum((input == targs).float() * clw) / clw.sum()

def kappa_score(pred: Tensor, rater: Tensor):
    "Computes the rate of agreement (Cohens Kappa) between `pred` and `rater`"
    n = pred.shape[-1]
    c = confusion_matrix(pred, rater).float()
    sum0 = c.sum(0)
    sum1 = c.sum(1)
    expected = torch.einsum('i,j->ij', (sum0, sum1)) / torch.sum(sum0)
    w = torch.ones((n, n))
    idx = torch.arange(0, n)
    w[idx, idx] = 0
    k = torch.sum(w * c) / torch.sum(w * expected)
    return 1 - k

def confusion_matrix(input: Tensor, targs: Tensor):
    "Computes the confusion matrix"
    x = torch.arange(0, input.shape[-1])
    input = input.argmax(dim=-1).view(-1)
    cm = ((input==x[:, None]) & (targs==x[:, None, None])).sum(2)
    return cm

def top_k_accuracy(input: Tensor, targs: Tensor, k: int=5):
    "Computes the Top-k accuracy (target is in the top k predictions)"
    n = targs.shape[0]
    input = input.topk(k=k, dim=-1)[1].view(n, -1)
    targs = targs.view(n,-1)
    return (input == targs).sum(dim=1).float().mean()


def mean_class_accuracy(input: Tensor, targs: Tensor):
    "Computes the accuracy for each class label ->Rank1Tensor"
    x = torch.arange(0, input.shape[-1])
    targs = targs==x[:,None]
    input = input.argmax(-1)==x[:,None]
    label_sum = targs.sum(dim=1).float()
    eq = targs.float() * input.float()
    return eq.sum(1) / label_sum

In [136]:
def make_pred_target(batch_size=100, nb_targets=5, clas=True):    
    if clas:
        pred = torch.rand(batch_size, nb_targets)
        targ = torch.randint(0, nb_targets, (batch_size,)).long()
    else:
        pred = torch.rand(batch_size)
        targ = torch.rand(batch_size)
        
    w = list(np.random.dirichlet(np.random.rand(10)))
    
    return pred, targ, w

In [261]:
pred, targ, w = make_pred_target(1000, 5, clas=False)

In [267]:
r2_score(pred, targ)

tensor(-0.9356)