# Binary Classification with pytorch

DATASET : https://www.kaggle.com/datasets/erdemtaha/cancer-data

In [19]:
import pandas as pd
import numpy as np
import torch

In [20]:
cancer_df = pd.read_csv('./data/Cancer_Data.csv')

print(cancer_df.columns)

print("="*20)

print(cancer_df.info())

print("="*20)

print(cancer_df.head())

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_me

In [21]:
diagnosis_counts = cancer_df['diagnosis'].value_counts()

print(diagnosis_counts)

diagnosis
B    357
M    212
Name: count, dtype: int64


In [22]:
# Drop unnecessary columns
cancer_df = cancer_df.drop(columns=['id', 'Unnamed: 32'])

# Convert diagnosis to binary: Malignant (M) = 1, Benign (B) = 0
cancer_df['diagnosis'] = cancer_df['diagnosis'].map({'M': 1, 'B': 0})


# Split features and target
X = cancer_df.drop(columns=['diagnosis'])
y = cancer_df['diagnosis']

# Scaling Vs Normalization


#### Scaling
Scaling refers to resizing the range of features to ensure they are on a similar scale.

ex : Min-Max Scaling, Range 0 - 1

X_scaled = (X - Xmin)/(X_max - X_min)

We should use scaling when distances between points matter (e.g., KNN, gradient descent-based algorithms like neural networks).

#### Normalization

Normally, different features have different measurments which makes comparison difficult. If we dont standardize then one could dominate the calculations in machine learning models so we need a common scale.



Normalization refers to rescaling the data so that it has a mean of 0 and a standard deviation of 1.

We apply this for each column seperately and is also called Z-score normalization or standardization and both are statistical technique.

Mean 0: 
- The mean (or average) is a measure of the central tendency of your data. It tells you where the "center" of your data is.

- Now, if the mean is 0, it simply means that the center of the data has been shifted to zero. After standardization, the values will be centered around 0, which makes comparisons easier between features that were originally on different scales.

Standard Deviation 1:

- The standard deviation tells you how "spread out" or "spread around" the values are from the mean.

- A high standard deviation means the data points are widely spread out, and a low standard deviation means they are closely packed around the mean.

- Scaling needs to be done on all features so that the spread or variability of the data is standardized to be 1 unit so most of the data is within 1 standard deviation from the mean.

- If the standard deviation is 1, it means that most of your data points lie within one unit (1) away from the mean in both directions. +1 and -1.

- Data points  much higher or lower than the average will now be expressed in multiples of standard deviation

- In short Standard deviation will give us the unit of 1 SD


A = 100
B = 200
C = 300

Mean = 200

SD = 100 = 1 unit

A = -1
B = 0 
C = +1

Mu = mean
Sigma = Standard Deviation

X_norm = (X - Mu)/(Sigma)

We should use Normalization when you need to center the data for algorithms sensitive to feature distribution and in algorithms that assume normally distributed data.

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pytorch Implementation

In [24]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

In [25]:
from torch.utils.data import DataLoader, TensorDataset

# Creating Dataset and Dataloader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

## Define Neural Network

In [26]:
import torch.nn as nn

# Define the Neural Network
class CancerNN(nn.Module):
    def __init__(self):
        super(CancerNN, self).__init__()
        self.fc1 = nn.Linear(30, 60)  # 30 input features
        self.fc2 = nn.Linear(60, 80)
        self.fc3 = nn.Linear(80, 100)
        self.fc4 = nn.Linear(100, 140)
        self.fc5 = nn.Linear(140, 180)
        self.fc6 = nn.Linear(180, 150)
        self.fc7 = nn.Linear(150, 100)
        self.fc8 = nn.Linear(100, 50)
        self.fc9 = nn.Linear(50, 25)
        self.fc10 = nn.Linear(25, 1)

        self.dropout = nn.Dropout(0.3)  # 30% dropout
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.dropout(x)
        x = torch.relu(self.fc4(x))
        x = self.dropout(x)
        x = torch.relu(self.fc5(x))
        x = self.dropout(x)
        x = torch.relu(self.fc6(x))
        x = torch.relu(self.fc7(x))
        x = self.dropout(x)
        x = torch.relu(self.fc8(x))
        x = torch.relu(self.fc9(x))
        x = self.sigmoid(self.fc10(x))
        return x

In [27]:
import torch.optim as optim

model = CancerNN()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.009)

In [28]:
# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            labels = labels.unsqueeze(1)  # Add extra dimension to match the output shape
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")

In [29]:
train_model(model, train_loader, criterion, optimizer, epochs=40)

Epoch 1/40, Loss: 0.48881040713749824
Epoch 2/40, Loss: 0.1706023317296058
Epoch 3/40, Loss: 0.0923114600746582
Epoch 4/40, Loss: 0.04561207322403789
Epoch 5/40, Loss: 0.03795632511319127
Epoch 6/40, Loss: 0.024321097430220106
Epoch 7/40, Loss: 0.38385028412255146
Epoch 8/40, Loss: 0.36862138103072845
Epoch 9/40, Loss: 0.34100768603384496
Epoch 10/40, Loss: 0.06314152517673695
Epoch 11/40, Loss: 0.19026650758387403
Epoch 12/40, Loss: 0.0764404396196672
Epoch 13/40, Loss: 0.06322785338718576
Epoch 14/40, Loss: 0.03032026014601191
Epoch 15/40, Loss: 0.47797748457320116
Epoch 16/40, Loss: 0.15761693932581694
Epoch 17/40, Loss: 0.049267973036815724
Epoch 18/40, Loss: 0.036118607657651104
Epoch 19/40, Loss: 0.022170197191429725
Epoch 20/40, Loss: 0.026028451687943745
Epoch 21/40, Loss: 0.01645579804390793
Epoch 22/40, Loss: 0.008246669168744421
Epoch 23/40, Loss: 0.053173351456555926
Epoch 24/40, Loss: 0.5399745826919874
Epoch 25/40, Loss: 0.02825996347981648
Epoch 26/40, Loss: 0.5321356414

## Model Evaluation|

In [30]:
# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            labels = labels.unsqueeze(1)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()  # Sigmoid output to binary
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [31]:
# Evaluate the model
evaluate_model(model, test_loader)

Test Accuracy: 96.49%


# Saving the prediction in dataframe