### Logistic Regression using pytorch

**Date:** 30/10/2021  
**Author:** Murad Popattia

In [117]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn import metrics
import cv2
import numpy as np

In [2]:
# important to give your model, samples
class CustomDataset:
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets
        
    # should return len of the data
    def __len__(self):
        return len(self.data)
    
    # returns the data element at the specified idx
    def __getitem__(self, idx):
        cur_sample = self.data[idx, :]
        cur_target = self.targets[idx]
        
        # returns a dict of tensors
        return {
            "sample": torch.tensor(cur_sample, dtype=torch.float),
            "target": torch.tensor(cur_target,  dtype=torch.long)
        }

In [47]:
data, targets = make_classification(n_samples=1000)
train_data, test_data, train_targets,test_targets = train_test_split(data, targets, stratify=targets)

Stratify is done for classification problems to deal with class imbalance when sampling for the training data. Returns the same number of samples for the all the classes as shown below

In [48]:
len(train_targets[train_targets == 0]), len(train_targets[train_targets == 1]) 

(375, 375)

In [49]:
train_dataset  = CustomDataset(train_data, train_targets)
test_dataset  = CustomDataset(test_data, test_targets)

In [50]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, num_workers=0)

In [51]:
# creating the model
def model(x,W,b):
    return x.mm(W) + b # WX+B

In [52]:
train_data.shape

(750, 20)

### Creating the model

In [154]:
# setting requires grad for backprop
W = torch.randn(20, 1, requires_grad = True)
b = torch.randn(1, requires_grad = True)

In [155]:
lr = 0.001

# training the model
for epoch in range(10):
    epoch_loss = 0
    counter = 0
    # extract over a batch of samples for each epoch
    for data in train_loader:
        x_train = data["sample"]
        y_train = data["target"]
        
        # W.grad is automatically shifted between zero and none by PyTorch
        if W.grad is not None:
            W.zero_grad()
        
        output = model (x_train, W, b)
        # MSE
        loss = torch.mean((y_train.view(-1) - output.view(-1)) ** 2) # .view(-1) essentially multiplies all dimensions togethers in a single array
        epoch_loss = epoch_loss + loss.item()
        
        # calculating gradients for W, b
        loss.backward() 
        
        # updating the weight and bias
        with torch.no_grad():
            # disabling torch to update gradients here
            W = W - lr * W.grad
            b = b - lr * b.grad
            
        # enable the gradients
        W.requires_grad_(True)
        b.requires_grad_(True)
        counter += 1 # incrementing counter for every batch item
        
    print("Epoch:", epoch, "Loss per batch: ", epoch_loss/counter)

Epoch: 0 Loss per batch:  17.582083642482758
Epoch: 1 Loss per batch:  5.556818822913981
Epoch: 2 Loss per batch:  2.289998558924553
Epoch: 3 Loss per batch:  1.0871505683010563
Epoch: 4 Loss per batch:  0.5653844877601938
Epoch: 5 Loss per batch:  0.32255866528151833
Epoch: 6 Loss per batch:  0.2058113976480796
Epoch: 7 Loss per batch:  0.14858027525800974
Epoch: 8 Loss per batch:  0.12011661746618436
Epoch: 9 Loss per batch:  0.10579177171940558


### Calculting the accuracy of the model

In [156]:
outputs = []
labels = []

with torch.no_grad():
    for data in test_loader:
        x_test = data["sample"]
        y_test = data["target"]
        
        output = model(x_test, W, b)
        labels.append(y_test)
        outputs.append(output)

In [157]:
outputs[:2], labels[:2]

([tensor([[0.6680],
          [0.4481],
          [0.4526],
          [0.1769]]),
  tensor([[ 0.2613],
          [-0.0996],
          [ 0.2428],
          [-0.0719]])],
 [tensor([1, 0, 1, 0]), tensor([1, 0, 0, 0])])

As output and labels are a list of tensors hence we can use *torch.cat()*

In [158]:
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.961664

### Trying with random W and b

In [151]:
# setting requires grad for backprop
W = torch.randn(20, 1, requires_grad = True)
b = torch.randn(1, requires_grad = True)

outputs = []
labels = []

with torch.no_grad():
    for data in test_loader:
        x_test = data["sample"]
        y_test = data["target"]
        
        output = model(x_test, W, b)
        labels.append(y_test)
        outputs.append(output)

metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.17260799999999998