<a href="https://colab.research.google.com/github/Madushani-Weerasekara/PyTorch-Linear-Classification/blob/main/PyTorch_Linear_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt


In [2]:
# Load in the dataset
from sklearn.datasets import load_breast_cancer


In [3]:
# Load the data
data = load_breast_cancer()

In [4]:
# Check the type of data
type(data)

sklearn.utils._bunch.Bunch

In [5]:
# Note : It is a bumch object.

# This basically act like a dictionary where you can treat the keys like atributes.
data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [6]:
# 'data (the atribute) means input data
data.data.shape
# It has 569 samples and 30 features

(569, 30)

In [7]:
# Targets
data.target
# Note how the targets are just 0s and 1s
# Normally when you have K targets, they  are labeled 0..K-1

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [8]:
# Their meaning is not lost
data.target_names

array(['malignant', 'benign'], dtype='<U9')

In [9]:
# There are also 569 corresponding targets
data.target.shape

(569,)

In [10]:
# You can also determine the meaning of each feature
data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [11]:
# Normally we would put all of our imports at the top
from sklearn.model_selection import train_test_split


In [12]:
# Split the data into train and test
# this let us simulate how our model will perform in the future
x_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.33)
N,D = x_train.shape

In [13]:
# Scale the data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(X_test)


In [14]:
# Build the model
model = nn.Sequential(
    nn.Linear(D, 1),
    nn.Sigmoid()
)

In [15]:
# Loss and Optimizer
criterion = nn.BCELoss()
Optimizer = torch.optim.Adam(model.parameters())

In [16]:
# Convert data into torch tensor
x_train = torch.from_numpy(x_train.astype(np.float32))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32).reshape(-1, 1))
y_test = torch.from_numpy(y_test.astype(np.float32).reshape(-1, 1))

In [17]:
# Train the model
n_epochs = 1000


In [18]:
# Stuff to store
train_losses = np.zeros(n_epochs)
test_losess = np.zeros(n_epochs)


In [19]:
for it in range(n_epochs):
  # Zero the parameter gradients
  Optimizer.zero_grad()


In [20]:
# Forward pass
outputs = model(x_train)
loss = criterion(outputs,y_train)


In [21]:
# Backward and Optimize
loss.backward()
Optimizer.step()


In [22]:
# Get test loss
outputs_test = model(x_test)
loss_test = criterion(outputs_test, y_test)

In [23]:
# Save losses
train_losses[it] = loss.item()
test_losess[it] = loss_test.item()

In [26]:
if (it+1) % 50 == 0:
  print(f'Epoch {it+1}/{n_epochs},Train Loss: {loss.item():.4f},Test Loss: {loss.item()}')

Epoch 1000/1000,Train Loss: 0.5125,Test Loss: 0.5124738216400146
