# Binary Classification with PyTorch
**Key Notes about Last Layer Activation:**
- ✅ **No activation needed** when using `CrossEntropyLoss` (contains built-in Softmax)
- ❌ **Must add Sigmoid** when using `BCEWithLogitsLoss` (for binary classification)

In [8]:
# !pip install torch
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## 1. Data Preparation

In [9]:
# Create synthetic binary classification data
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, 
                         n_informative=8, random_state=42)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                  random_state=42, stratify=y)

# Standardize features
scaler_X = StandardScaler().fit(X_train)
X_train = scaler_X.transform(X_train)
X_test = scaler_X.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

## 2. Model Definition
### Activation Function Rules:
```python
# When using CrossEntropyLoss (this example):
nn.Linear(32, 2)  # No activation!

# When using BCEWithLogitsLoss:
nn.Sequential(
    nn.Linear(32, 1),
    nn.Sigmoid()  # Must add explicitly
)
```

In [10]:
class DNNClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(10, 64),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # No activation - CrossEntropyLoss handles it; compatible with multi-class architectures
        )
    
    def forward(self, x):
        return self.net(x)

model = DNNClassifier()

## 3. Training Setup

In [11]:
criterion = nn.CrossEntropyLoss()  # Contains built-in Softmax
# compatible with multi-class architectures
optimizer = optim.Adam(model.parameters(), lr=0.001)

## 4. Training Loop

In [12]:
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 10, Loss: 0.6399
Epoch 20, Loss: 0.5962
Epoch 30, Loss: 0.5499
Epoch 40, Loss: 0.4970
Epoch 50, Loss: 0.4464
Epoch 60, Loss: 0.4151
Epoch 70, Loss: 0.3644
Epoch 80, Loss: 0.3237
Epoch 90, Loss: 0.3015
Epoch 100, Loss: 0.2708


## 5. Evaluation

In [13]:
model.eval()
with torch.no_grad():
    # Get probabilities via Softmax
    logits = model(X_test_tensor)
    probabilities = torch.softmax(logits, dim=1)  # Converts to probabilities
    _, predicted = torch.max(logits, 1)
    
    print("\nProbability Examples:")
    print(probabilities[:5].numpy())  # Show first 5 predictions
    
    print("\nTest Accuracy:", accuracy_score(y_test, predicted.numpy()))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, predicted.numpy()))
    print("\nClassification Report:\n", classification_report(y_test, predicted.numpy()))


Probability Examples:
[[8.0727845e-02 9.1927212e-01]
 [5.8894467e-01 4.1105536e-01]
 [9.7765690e-01 2.2343101e-02]
 [9.9971932e-01 2.8065426e-04]
 [1.0496309e-01 8.9503688e-01]]

Test Accuracy: 0.89

Confusion Matrix:
 [[87 13]
 [ 9 91]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.87      0.89       100
           1       0.88      0.91      0.89       100

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.89       200
weighted avg       0.89      0.89      0.89       200



## Alternative BCEWithLogitsLoss Version
```python
# Different last layer:
nn.Sequential(
    nn.Linear(32, 1),  # Single output
    nn.Sigmoid()       # Must add for BCELoss
)
# Or use BCEWithLogitsLoss (preferred):
nn.Linear(32, 1)       # No activation
criterion = nn.BCEWithLogitsLoss()  # Built-in Sigmoid
```