In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

OSError: [WinError 1114] A dynamic link library (DLL) initialization routine failed. Error loading "d:\vscode\AI\Santosh_env\Lib\site-packages\torch\lib\c10.dll" or one of its dependencies.

In [None]:
# 1. Data retrieval & inspection
df = pd.read_csv('circles_binary_classification.csv')
print("Head of the dataset:")
print(df.head())
print("\nDescription of the dataset:")
print(df.describe())

In [None]:
# 2. Data cleaning & feature design
# Assuming minimal cleaning needed, as per assignment
X = df[['X1', 'X2']].values
y = df['label'].values

# Convert to torch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Make it (N, 1)

print(f"X shape: {X.shape}, y shape: {y.shape}")

In [None]:
# 3. Visualize data
plt.scatter(X[:, 0], X[:, 1], c=y.squeeze(), cmap='bwr', alpha=0.7)
plt.xlabel('X1')
plt.ylabel('X2')
plt.title('Scatter plot of X1 vs X2 colored by label')
plt.colorbar(label='Label')
plt.show()

In [None]:
# 4. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train shapes: X {X_train.shape}, y {y_train.shape}")
print(f"Test shapes: X {X_test.shape}, y {y_test.shape}")

In [None]:
# 5. Device & dtype
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Move tensors to device
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

In [None]:
# 6. Implement baseline models
class ModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 5)
        self.output = nn.Linear(5, 1)
    
    def forward(self, x):
        x = self.layer(x)
        x = self.output(x)
        return x

class ModelV1(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(2, 15)
        self.layer2 = nn.Linear(15, 15)
        self.output = nn.Linear(15, 1)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.output(x)
        return x

class ModelV2(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(2, 64)
        self.layer2 = nn.Linear(64, 64)
        self.layer3 = nn.Linear(64, 10)
        self.output = nn.Linear(10, 1)
    
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        x = self.output(x)
        return x

In [None]:
# 7. Loss, optimizer, metrics
def accuracy_fn(y_true, y_pred):
    y_pred = torch.round(torch.sigmoid(y_pred))
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
# 8. Training loop
def train_and_test_loop(model, X_train, y_train, X_test, y_test, epochs, lr=0.1):
    torch.manual_seed(42)
    model = model.to(device)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    
    train_losses = []
    test_losses = []
    train_accs = []
    test_accs = []
    
    for epoch in range(epochs):
        model.train()
        y_logits = model(X_train)
        loss = loss_fn(y_logits, y_train)
        acc = accuracy_fn(y_train, y_logits)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.inference_mode():
            test_logits = model(X_test)
            test_loss = loss_fn(test_logits, y_test)
            test_acc = accuracy_fn(y_test, test_logits)
        
        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
        
        train_losses.append(loss.item())
        test_losses.append(test_loss.item())
        train_accs.append(acc)
        test_accs.append(test_acc)
    
    return train_losses, test_losses, train_accs, test_accs

In [None]:
# Helper function for plotting decision boundary
def plot_decision_boundary(model, X, y, title="Decision Boundary"):
    model.eval()
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))
    
    X_mesh = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32).to(device)
    with torch.inference_mode():
        Z = model(X_mesh)
        Z = torch.round(torch.sigmoid(Z)).cpu().numpy().reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, alpha=0.4, cmap='bwr')
    plt.scatter(X[:, 0], X[:, 1], c=y.squeeze(), cmap='bwr', edgecolor='k')
    plt.title(title)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.show()

In [None]:
# ModelV0: 2 → 5 → 1 (no activation)
model_v0 = ModelV0()
print("Untrained ModelV0 predictions:")
with torch.inference_mode():
    untrained_logits = model_v0(X_test)
    untrained_preds = torch.round(torch.sigmoid(untrained_logits))
    print(f"Untrained accuracy: {accuracy_fn(y_test, untrained_logits):.2f}%")

train_losses_v0, test_losses_v0, train_accs_v0, test_accs_v0 = train_and_test_loop(model_v0, X_train, y_train, X_test, y_test, epochs=100)

print(f"\nTrained ModelV0 final test accuracy: {test_accs_v0[-1]:.2f}%")

# Plot decision boundaries
plot_decision_boundary(model_v0, X_train.cpu(), y_train.cpu(), "ModelV0 Train Decision Boundary")
plot_decision_boundary(model_v0, X_test.cpu(), y_test.cpu(), "ModelV0 Test Decision Boundary")

# Plot loss curves
plt.plot(train_losses_v0, label='Train Loss')
plt.plot(test_losses_v0, label='Test Loss')
plt.title('ModelV0 Loss Curves')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# ModelV1: 2 → 15 → 15 → 1 (no activation)
model_v1 = ModelV1()
print("Untrained ModelV1 predictions:")
with torch.inference_mode():
    untrained_logits = model_v1(X_test)
    print(f"Untrained accuracy: {accuracy_fn(y_test, untrained_logits):.2f}%")

train_losses_v1, test_losses_v1, train_accs_v1, test_accs_v1 = train_and_test_loop(model_v1, X_train, y_train, X_test, y_test, epochs=1000)

print(f"\nTrained ModelV1 final test accuracy: {test_accs_v1[-1]:.2f}%")

# Plot decision boundaries
plot_decision_boundary(model_v1, X_train.cpu(), y_train.cpu(), "ModelV1 Train Decision Boundary")
plot_decision_boundary(model_v1, X_test.cpu(), y_test.cpu(), "ModelV1 Test Decision Boundary")

# Plot loss curves
plt.plot(train_losses_v1, label='Train Loss')
plt.plot(test_losses_v1, label='Test Loss')
plt.title('ModelV1 Loss Curves')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# ModelV2: 2 → 64 → 64 → 10 -> 1 with ReLU
model_v2 = ModelV2()
print("Untrained ModelV2 predictions:")
with torch.inference_mode():
    untrained_logits = model_v2(X_test)
    print(f"Untrained accuracy: {accuracy_fn(y_test, untrained_logits):.2f}%")

train_losses_v2, test_losses_v2, train_accs_v2, test_accs_v2 = train_and_test_loop(model_v2, X_train, y_train, X_test, y_test, epochs=1500)

print(f"\nTrained ModelV2 final test accuracy: {test_accs_v2[-1]:.2f}%")

# Plot decision boundaries
plot_decision_boundary(model_v2, X_train.cpu(), y_train.cpu(), "ModelV2 Train Decision Boundary")
plot_decision_boundary(model_v2, X_test.cpu(), y_test.cpu(), "ModelV2 Test Decision Boundary")

# Plot loss curves
plt.plot(train_losses_v2, label='Train Loss')
plt.plot(test_losses_v2, label='Test Loss')
plt.title('ModelV2 Loss Curves')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Optional: ModelV2 with Adam optimizer
model_v2_adam = ModelV2()
def train_with_adam(model, X_train, y_train, X_test, y_test, epochs, lr=0.01):
    torch.manual_seed(42)
    model = model.to(device)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    train_losses = []
    test_losses = []
    train_accs = []
    test_accs = []
    
    for epoch in range(epochs):
        model.train()
        y_logits = model(X_train)
        loss = loss_fn(y_logits, y_train)
        acc = accuracy_fn(y_train, y_logits)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        model.eval()
        with torch.inference_mode():
            test_logits = model(X_test)
            test_loss = loss_fn(test_logits, y_test)
            test_acc = accuracy_fn(y_test, test_logits)
        
        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
        
        train_losses.append(loss.item())
        test_losses.append(test_loss.item())
        train_accs.append(acc)
        test_accs.append(test_acc)
    
    return train_losses, test_losses, train_accs, test_accs

train_losses_v2_adam, test_losses_v2_adam, train_accs_v2_adam, test_accs_v2_adam = train_with_adam(model_v2_adam, X_train, y_train, X_test, y_test, epochs=500)

print(f"\nTrained ModelV2 with Adam final test accuracy: {test_accs_v2_adam[-1]:.2f}%")

# Plot loss curves for comparison
plt.plot(test_losses_v2, label='SGD Test Loss')
plt.plot(test_losses_v2_adam, label='Adam Test Loss')
plt.title('ModelV2: SGD vs Adam Loss Curves')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 11. Discussion and Conclusion

## Discussion
- **ModelV0**: Simple linear model with 2 → 5 → 1 layers, no activations. As expected, it performs poorly on the non-linear circles dataset, achieving around 50% accuracy (random guessing).
- **ModelV1**: Deeper linear model 2 → 15 → 15 → 1, still no activations. Slightly better but still linear, so limited performance.
- **ModelV2**: Non-linear model with ReLU activations 2 → 64 → 64 → 10 → 1. Should perform much better as it can learn non-linear boundaries.
- **Optimizer Comparison**: Adam converges faster than SGD, requiring fewer epochs for similar performance.

## Conclusion
Non-linear activations are crucial for non-linear classification tasks. Deeper networks with ReLU can capture complex patterns. Adam optimizer provides faster convergence compared to SGD.