### Logistic Regression - scikit-learn-datasets

### Imports

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import numpy as np
import torch.nn as nn
from torch.nn import functional as F

### Data Preparation

In [2]:
breast_cancer = datasets.load_breast_cancer()

In [3]:
#print(breast_cancer.DESCR)

In [4]:
X = breast_cancer.data
y = breast_cancer.target
types = breast_cancer.target_names
types

array(['malignant', 'benign'], dtype='<U9')

> **Splitting the data** - into train and test

In [5]:
X_train, X_test, y_train, y_test= train_test_split(X, y, random_state=42, test_size=.2)

> **Scaling the data** - using the `StandardScaler`

In [6]:
scaler = StandardScaler()

In [7]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

> **Converting data** - Convering the data into ``pytorch`` tensors.

In [8]:
X_train_tensors =torch.from_numpy(X_train.astype('float32'))
X_test_tensors =torch.from_numpy(X_test.astype('float32'))

y_train_tensors =torch.from_numpy(y_train.astype('float32'))
y_test_tensors =torch.from_numpy(y_test.astype('float32'))

In [9]:
X_train_tensors.shape, y_train_tensors.shape, X_test_tensors.shape, y_test_tensors.shape

(torch.Size([455, 30]),
 torch.Size([455]),
 torch.Size([114, 30]),
 torch.Size([114]))

### Changing the y_shapes

In [10]:
y_test_tensors = y_test_tensors.view(y_test_tensors.shape[0], 1)
y_train_tensors = y_train_tensors.view(y_train_tensors.shape[0], 1)

In [11]:
y_test_tensors[:2], y_train_tensors[:5]

(tensor([[1.],
         [0.]]),
 tensor([[1.],
         [0.],
         [1.],
         [1.],
         [1.]]))

### Model Creation

In [12]:
input_shape = X_test_tensors.shape[-1]
input_shape 

30

In [13]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linn = nn.Linear(input_shape, 1)
        
    def forward(self, X):
        return torch.sigmoid(self.linn(X)) ## good for binary classifiction problems
    
    
model = LogisticRegression()
model

LogisticRegression(
  (linn): Linear(in_features=30, out_features=1, bias=True)
)

### Loss and Optimizer
> For the `loss` we are going to use `Binary Cross Entropy Loss Function (BCELoss)` and `SGD()` optimizer

In [14]:
loss_function = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

### Trainning the Model

In [20]:
EPOCHS = 200

for epoch in range(EPOCHS):
    # forward pass 
    y_pred = model(X_train_tensors)
    # loss
    loss = loss_function(y_pred, y_train_tensors) ## Pass the y_pred first
    # backward pass
    loss.backward()
    ## update weights
    optimizer.step()
    ## zero grad
    optimizer.zero_grad()
    if epoch % 20 == 0:
        print(f"Epochs: {epoch+1}/{EPOCHS}, loss: {loss.item():.4f}")

Epochs: 1/200, loss: 0.1607
Epochs: 21/200, loss: 0.1569
Epochs: 41/200, loss: 0.1534
Epochs: 61/200, loss: 0.1501
Epochs: 81/200, loss: 0.1471
Epochs: 101/200, loss: 0.1444
Epochs: 121/200, loss: 0.1418
Epochs: 141/200, loss: 0.1394
Epochs: 161/200, loss: 0.1372
Epochs: 181/200, loss: 0.1351


### Evaluating the Model

In [23]:
total, correct = 0, 0

with torch.no_grad():
    y_pred = torch.round(model(X_test_tensors))
    
    for i, j in zip(y_pred, y_test_tensors):
        if i == j:
            correct+=1
        total+=1
    print(f"Accuracy: {correct/total:.2f}")

Accuracy: 0.98


### Making prediction

In [26]:
torch.round(model(X_test_tensors[:2]))

tensor([[1.],
        [0.]], grad_fn=<RoundBackward>)