In [1]:
import numpy as np
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
class CustomDataset:
    
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
#         return len(self.data)
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        current_sample = self.data[idx, :]
        current_target = self.targets[idx]
        return {"x": torch.tensor(current_sample, dtype=torch.float), 
               "y": torch.tensor(current_target, dtype=torch.long)
               }
    

In [3]:
x, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=100)

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, random_state=100) # test_size=0.2
# stratify splits data into 75 % train set and 25 % validation or test set
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(750, 20) (750,) (250, 20) (250,)


In [5]:
train_dataset = CustomDataset(data=x_train, targets=y_train)
test_dataset = CustomDataset(data=x_test, targets=y_test)

In [6]:
train_dataset[0]

{'x': tensor([-0.2740,  0.6540, -0.4939, -0.9140, -1.2385, -0.9883,  1.7838, -0.9261,
          0.4457, -0.0412, -1.1414,  0.1102, -2.2618, -0.8671,  0.6208,  0.7507,
         -1.6332,  1.7366, -0.9487,  0.0370]),
 'y': tensor(0)}

In [7]:
train_dataset[0]

{'x': tensor([-0.2740,  0.6540, -0.4939, -0.9140, -1.2385, -0.9883,  1.7838, -0.9261,
          0.4457, -0.0412, -1.1414,  0.1102, -2.2618, -0.8671,  0.6208,  0.7507,
         -1.6332,  1.7366, -0.9487,  0.0370]),
 'y': tensor(0)}

In [8]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=4)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=4)

In [9]:
model = lambda x, w, b: torch.matmul(x, w) + b
model

<function __main__.<lambda>(x, w, b)>

In [10]:
w = torch.randn(20, 1, requires_grad=True)
b = torch.randn(1, requires_grad=True)
learning_rate = 0.001
# print("weights", w)
# print("bias", b)
# print("lr", learning_rate)

In [11]:
outputs = []
labels = []
with torch.no_grad():
    for data in test_loader:
        xtest = data["x"]
        ytest = data["y"]
        
        output = model(xtest, w, b)
        labels.append(ytest)
        outputs.append(output)

In [12]:
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.6113671274961597

In [13]:
for epoch in range(10):
#     print("*"*50)
#     print("EPOCH {}".format(epoch))
    epoch_loss = 0
    counter = 0
    
    for data in train_loader:
        xtrain = data["x"]
        ytrain = data["y"]
#         print("x train", xtrain)
#         print("y train", ytrain)
        
        output = model(xtrain, w, b)
#         print("output", output)
        loss = torch.mean((ytrain.view(-1) - output.view(-1))**2)
#         print("loss", loss)
#         print("loss.item()", loss.item())
        epoch_loss = epoch_loss + loss.item()
#         print("epoch loss", epoch_loss)
        loss.backward()
        
        with torch.no_grad():
            w = w - learning_rate * w.grad
            b = b - learning_rate * b.grad
        
        w.requires_grad_(True)
        b.requires_grad_(True)
#         print(w, b, "\n")
        counter += 1
#         break
    print(epoch, epoch_loss/counter)
#     break
        

0 19.579123447550103
1 8.761709779183915
2 4.116205718526815
3 2.0112085712954717
4 1.0253168032405542
5 0.5540028300948758
6 0.32564352434425753
7 0.2139338352469767
8 0.15887486499280945
9 0.131565201118529


In [14]:
outputs = []
labels = []
with torch.no_grad():
    for data in test_loader:
        xtest = data["x"]
        ytest = data["y"]
        
        output = model(xtest, w, b)
        labels.append(ytest)
        outputs.append(output)

In [15]:
labels

[tensor([1, 0, 1, 1]),
 tensor([1, 1, 1, 0]),
 tensor([0, 0, 1, 1]),
 tensor([0, 0, 0, 0]),
 tensor([1, 1, 1, 0]),
 tensor([1, 1, 0, 0]),
 tensor([0, 1, 1, 1]),
 tensor([0, 0, 0, 0]),
 tensor([0, 0, 1, 0]),
 tensor([0, 1, 1, 1]),
 tensor([0, 0, 1, 1]),
 tensor([0, 0, 1, 0]),
 tensor([1, 1, 0, 1]),
 tensor([1, 0, 1, 1]),
 tensor([0, 0, 1, 1]),
 tensor([1, 0, 1, 1]),
 tensor([1, 1, 1, 0]),
 tensor([0, 0, 1, 1]),
 tensor([1, 1, 0, 1]),
 tensor([1, 0, 1, 1]),
 tensor([1, 0, 0, 1]),
 tensor([0, 1, 1, 0]),
 tensor([1, 0, 1, 1]),
 tensor([1, 1, 0, 0]),
 tensor([1, 1, 0, 0]),
 tensor([1, 1, 1, 0]),
 tensor([0, 1, 0, 1]),
 tensor([1, 1, 0, 0]),
 tensor([1, 0, 0, 1]),
 tensor([1, 0, 1, 1]),
 tensor([0, 1, 1, 0]),
 tensor([0, 1, 1, 1]),
 tensor([1, 1, 1, 0]),
 tensor([1, 1, 1, 0]),
 tensor([1, 1, 1, 0]),
 tensor([0, 1, 0, 0]),
 tensor([0, 1, 1, 0]),
 tensor([1, 0, 0, 1]),
 tensor([1, 0, 0, 0]),
 tensor([0, 0, 1, 0]),
 tensor([1, 0, 0, 1]),
 tensor([0, 0, 0, 1]),
 tensor([0, 0, 0, 0]),
 tensor([1,

In [16]:
torch.cat(outputs).view(-1)

tensor([ 0.7736, -0.5553, -0.0949,  0.5119, -0.0343,  0.8114,  0.9362,  0.2969,
         0.1744,  0.1623,  0.7285,  1.4850,  0.6090,  0.0743,  0.7295, -0.0654,
         0.3553,  0.9316,  0.5529,  0.1256,  0.5378,  0.4631,  0.3301,  0.3506,
         0.0496,  0.9745,  0.8053,  0.6819,  0.0946, -0.0531,  0.1678,  0.1004,
         0.1931,  0.2425,  0.7688,  0.1355,  0.1624,  0.9868,  0.9980,  0.6318,
         0.1212, -0.0498,  0.7989,  0.6447,  0.1406,  0.0437,  1.1816,  0.0732,
         0.9684,  1.0331,  0.2809,  1.2671,  0.8795,  0.1199,  0.5137,  0.7925,
         0.0310,  0.3874,  0.1777,  0.6879,  0.8900,  0.0926,  0.8282,  0.2396,
         0.8880,  0.7886,  0.3704, -0.1239,  0.2673,  0.0558,  0.5957,  1.0128,
         0.5097,  0.9002, -0.1877,  1.0733,  0.6327,  0.0707,  1.0683,  0.2833,
         0.9055,  0.0600, -0.1283,  0.8490,  0.1706,  0.9508,  1.0699,  0.0274,
         0.4305,  0.0997,  1.1695,  0.3748,  0.7266,  1.0284,  0.7576, -0.0357,
         0.7809,  0.9658,  0.2748,  0.36

In [17]:
torch.cat(labels).view(-1)

tensor([1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
        0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
        1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
        0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1,
        1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
        0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 1, 1, 0, 0, 1, 0, 1])

In [18]:
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.9505248335893496