In [1]:
import torch
import numpy as np
from sklearn.datasets import make_classification
from scripts.CustomDataset import CustomDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
x, y = make_classification(n_samples=1000)

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x,y,stratify=y)

In [4]:
train_ds = CustomDataset(data = x_train,targets=y_train)
test_ds = CustomDataset(data = x_test,targets=y_test)

In [5]:
train_dl = torch.utils.data.DataLoader(train_ds,batch_size = 4)   
test_dl = torch.utils.data.DataLoader(test_ds,batch_size = 4)   

In [6]:
model = lambda x,w,b: torch.matmul(x,w)+b

In [8]:
W = torch.randn(20,1,requires_grad=True)
b = torch.randn(1,requires_grad=True)
learning_rate = 0.001
n_epochs = 10

In [9]:
for epoch in range(n_epochs):
    counter = 0
    epoch_loss = 0
    for data in train_dl:
        x_tr = data["data"]
        y_tr = data["target "]

        out = model(x_tr,W,b)
        loss = torch.mean((y_tr.view(-1) - out.view(-1)) ** 2)
        epoch_loss = epoch_loss + loss.item()

        loss.backward()

        with torch.no_grad():
            W = W - learning_rate * W.grad
            b = b - learning_rate * b.grad

        W.requires_grad_(True)
        b.requires_grad_(True)
        counter += 1
    print("# {} | Loss {}".format(epoch, epoch_loss/counter))

# 0 | Loss 13.708605119205536
# 1 | Loss 6.466853470402829
# 2 | Loss 3.264493370151266
# 3 | Loss 1.6986129161446017
# 4 | Loss 0.9142414766978076
# 5 | Loss 0.5168642276709781
# 6 | Loss 0.3138345213250277
# 7 | Loss 0.2093537258098219
# 8 | Loss 0.15525142858770893
# 9 | Loss 0.12708573497613854


In [10]:
outputs = []
labels = []

with torch.no_grad():
    for data in test_dl:
        x_tst = data["data"]
        y_tst = data["target "]

        out = model(x_tst,W,b)
        labels.append(y_tst)
        outputs.append(out)

In [11]:
torch.cat(outputs).view(-1)
torch.cat(labels).view(-1)

tensor([0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
        1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
        0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
        0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0,
        1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
        0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
        0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,
        1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
        0, 1, 0, 0, 1, 0, 1, 0, 0, 0])

In [12]:
roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.9458559999999999

In [2]:
?train_test_split

[1;31mSignature:[0m
[0mtrain_test_split[0m[1;33m([0m[1;33m
[0m    [1;33m*[0m[0marrays[0m[1;33m,[0m[1;33m
[0m    [0mtest_size[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mtrain_size[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mrandom_state[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mshuffle[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mstratify[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Split arrays or matrices into random train and test subsets.

Quick utility that wraps input validation,
``next(ShuffleSplit().split(X, y))``, and application to input data
into a single call for splitting (and optionally subsampling) data into a
one-liner.

Read more in the :ref:`User Guide <cross_validation>`.

Parameters
----------
*arrays : sequence of indexables with same length / shape[0]
    Allowed inputs are lists, numpy arrays, scipy-sparse