# Single-Layer Custom ANN Implementation

**Assignment: Build a custom single-layer artificial neural network**

**Model Specifications:**
- Architecture: Y = w^T * x + b
- Activation function: Sigmoid
- Loss: Binary Cross Entropy
- Optimizer: Manual weight update using gradients

**Requirements:**
- Use only basic PyTorch operations (no torch.nn)
- Generate or load dataset from CSV
- Manual gradient computation and parameter updates

In [None]:
# Import required libraries
import torch
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import os

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")
print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

## 1. Dataset Generation and Loading

In [None]:
# Generate binary classification dataset as specified
def generate_dataset():
    print("Generating binary classification dataset...")
    
    X, y = make_classification(
        n_samples=100, 
        n_features=2, 
        n_classes=2, 
        n_redundant=0,
        n_informative=2,
        n_clusters_per_class=1,
        random_state=1
    )
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(X, columns=['f1', 'f2'])
    df['label'] = y
    df.to_csv('binary_data.csv', index=False)
    
    print(f"Dataset saved to: binary_data.csv")
    print(f"Dataset shape: {df.shape}")
    print(f"Label distribution: {df['label'].value_counts().to_dict()}")
    
    return X, y, df

# Generate the dataset
X, y, df = generate_dataset()

# Display first few rows
print("\nFirst 5 rows of the dataset:")
print(df.head())

# Visualize the dataset
plt.figure(figsize=(8, 6))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', alpha=0.7)
plt.xlabel('Feature 1 (f1)')
plt.ylabel('Feature 2 (f2)')
plt.title('Binary Classification Dataset')
plt.colorbar(scatter)
plt.grid(True, alpha=0.3)
plt.show()

## 2. Data Preparation

In [None]:
# Prepare data for training
def prepare_data(X, y, test_size=0.2, random_state=42):
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert to PyTorch tensors
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1).to(device)
    y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1).to(device)
    
    print(f"Training set: {X_train_tensor.shape[0]} samples")
    print(f"Test set: {X_test_tensor.shape[0]} samples")
    print(f"Device: {device}")
    
    return X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor, scaler

# Prepare the data
X_train, X_test, y_train, y_test, scaler = prepare_data(X, y)

print(f"\nData shapes:")
print(f"X_train: {X_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}")
print(f"y_test: {y_test.shape}")

## 3. Single-Layer ANN Implementation

In [None]:
class SingleLayerANN:
    """
    Custom Single-Layer Artificial Neural Network implementation
    using only basic PyTorch operations (no torch.nn)
    """
    
    def __init__(self, input_size, learning_rate=0.01, device='cpu'):
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.device = torch.device(device)
        
        # Initialize weights and bias using Xavier initialization
        self.weights = torch.randn(input_size, 1, device=self.device, requires_grad=True) * np.sqrt(2.0 / input_size)
        self.bias = torch.zeros(1, device=self.device, requires_grad=True)
        
        print(f"Initialized Single-Layer ANN:")
        print(f"  Input size: {input_size}")
        print(f"  Learning rate: {learning_rate}")
        print(f"  Device: {self.device}")
        print(f"  Weights shape: {self.weights.shape}")
        print(f"  Initial weights: {self.weights.detach().cpu().numpy().flatten()}")
        print(f"  Initial bias: {self.bias.detach().cpu().numpy().flatten()}")
    
    def sigmoid(self, z):
        """Sigmoid activation function"""
        z = torch.clamp(z, -500, 500)  # Prevent overflow
        return 1.0 / (1.0 + torch.exp(-z))
    
    def forward(self, X):
        """Forward pass: Y = sigmoid(w^T * x + b)"""
        z = X @ self.weights + self.bias
        y_pred = self.sigmoid(z)
        return y_pred
    
    def binary_cross_entropy_loss(self, y_pred, y_true):
        """Binary Cross Entropy Loss"""
        epsilon = 1e-15
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
        loss = -(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))
        return loss.mean()

# Initialize the model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SingleLayerANN(input_size=2, learning_rate=0.1, device=device)

## 4. Manual Training Implementation

In [None]:
def train_step(model, X, y):
    """
    Single training step with manual gradient computation
    """
    # Forward pass
    y_pred = model.forward(X)
    
    # Compute loss
    loss = model.binary_cross_entropy_loss(y_pred, y)
    
    # Manual backward pass (compute gradients)
    batch_size = X.shape[0]
    
    # Gradient of loss w.r.t. predictions
    epsilon = 1e-15
    y_pred_clamped = torch.clamp(y_pred, epsilon, 1 - epsilon)
    d_loss_d_pred = -(y / y_pred_clamped - (1 - y) / (1 - y_pred_clamped)) / batch_size
    
    # Gradient of sigmoid: d_sigmoid/d_z = sigmoid * (1 - sigmoid)
    d_sigmoid_d_z = y_pred * (1 - y_pred)
    
    # Chain rule: d_loss/d_z = d_loss/d_pred * d_pred/d_z
    d_loss_d_z = d_loss_d_pred * d_sigmoid_d_z
    
    # Gradients w.r.t. weights and bias
    d_loss_d_w = X.T @ d_loss_d_z
    d_loss_d_b = d_loss_d_z.sum(dim=0)
    
    # Manual parameter update (gradient descent)
    with torch.no_grad():
        model.weights -= model.learning_rate * d_loss_d_w
        model.bias -= model.learning_rate * d_loss_d_b
    
    return loss.item()

def calculate_accuracy(model, X, y):
    """Calculate accuracy"""
    with torch.no_grad():
        y_pred_prob = model.forward(X)
        y_pred = (y_pred_prob >= 0.5).float()
        correct = (y_pred == y).float().sum()
        accuracy = (correct / y.shape[0]) * 100
    return accuracy.item()

print("Training functions defined successfully!")

## 5. Training Loop

In [None]:
# Training parameters
epochs = 50

# Storage for metrics
train_losses = []
train_accuracies = []
test_accuracies = []

print(f"Training Single-Layer ANN for {epochs} epochs...")
print("=" * 60)

for epoch in range(1, epochs + 1):
    # Training step
    loss = train_step(model, X_train, y_train)
    train_losses.append(loss)
    
    # Calculate accuracies
    train_acc = calculate_accuracy(model, X_train, y_train)
    test_acc = calculate_accuracy(model, X_test, y_test)
    
    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    
    # Print progress
    if epoch == 1 or epoch % 10 == 0 or epoch == epochs:
        print(f"Epoch {epoch:2d}: Loss = {loss:.4f}, Train Acc = {train_acc:.1f}%, Test Acc = {test_acc:.1f}%")

print("=" * 60)
print("Training completed!")

# Final results
final_train_acc = train_accuracies[-1]
final_test_acc = test_accuracies[-1]

print(f"\nFinal Results:")
print(f"Training Accuracy: {final_train_acc:.1f}%")
print(f"Test Accuracy: {final_test_acc:.1f}%")

## 6. Results in Assignment Format

In [None]:
# Display results in the assignment's sample output format
print("=" * 50)
print("SAMPLE OUTPUT FORMAT (as requested):")
print("=" * 50)
print(f"Epoch 1: Loss = {train_losses[0]:.2f}")
if len(train_losses) >= 30:
    print(f"Epoch 30: Loss = {train_losses[29]:.2f}")
print(f"Accuracy on test set = {test_accuracies[-1]:.1f}%")

# Show final model parameters
print(f"\n" + "=" * 50)
print("FINAL MODEL PARAMETERS:")
print("=" * 50)
print(f"Final weights: {model.weights.detach().cpu().numpy().flatten()}")
print(f"Final bias: {model.bias.detach().cpu().numpy().flatten()}")

## 7. Visualization

In [None]:
# Create comprehensive visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Plot 1: Original dataset
scatter = ax1.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', alpha=0.7)
ax1.set_xlabel('Feature 1 (f1)')
ax1.set_ylabel('Feature 2 (f2)')
ax1.set_title('Original Dataset')
plt.colorbar(scatter, ax=ax1)

# Plot 2: Training loss
ax2.plot(range(1, len(train_losses) + 1), train_losses, 'b-', linewidth=2)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.set_title('Training Loss Over Time')
ax2.grid(True, alpha=0.3)

# Plot 3: Accuracy comparison
ax3.plot(range(1, len(train_accuracies) + 1), train_accuracies, 'g-', label='Train Accuracy', linewidth=2)
ax3.plot(range(1, len(test_accuracies) + 1), test_accuracies, 'r-', label='Test Accuracy', linewidth=2)
ax3.set_xlabel('Epoch')
ax3.set_ylabel('Accuracy (%)')
ax3.set_title('Training vs Test Accuracy')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Plot 4: Decision boundary (for standardized data)
X_train_np = X_train.detach().cpu().numpy()
y_train_np = y_train.detach().cpu().numpy().flatten()

# Create a mesh for decision boundary
h = 0.02
x_min, x_max = X_train_np[:, 0].min() - 1, X_train_np[:, 0].max() + 1
y_min, y_max = X_train_np[:, 1].min() - 1, X_train_np[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# Make predictions on the mesh
mesh_points = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()]).to(model.device)
with torch.no_grad():
    Z = model.forward(mesh_points).detach().cpu().numpy()
Z = Z.reshape(xx.shape)

# Plot decision boundary
ax4.contourf(xx, yy, Z, levels=50, alpha=0.6, cmap='RdYlBu')
scatter = ax4.scatter(X_train_np[:, 0], X_train_np[:, 1], c=y_train_np, cmap='viridis', edgecolors='black')
ax4.set_xlabel('Standardized Feature 1')
ax4.set_ylabel('Standardized Feature 2')
ax4.set_title('Decision Boundary (Standardized Data)')

plt.tight_layout()
plt.show()

print("Training visualization completed!")

## 8. Model Evaluation and Summary

In [None]:
# Final evaluation
print("=" * 60)
print("ASSIGNMENT COMPLETED SUCCESSFULLY!")
print("=" * 60)

print("\nModel Architecture:")
print(f"  Formula: Y = sigmoid(w^T * x + b)")
print(f"  Input features: 2")
print(f"  Output: 1 (binary classification)")
print(f"  Activation: Sigmoid")
print(f"  Loss: Binary Cross Entropy")
print(f"  Optimizer: Manual Gradient Descent")

print("\nTraining Summary:")
print(f"  Total epochs: {epochs}")
print(f"  Learning rate: {model.learning_rate}")
print(f"  Initial loss: {train_losses[0]:.4f}")
print(f"  Final loss: {train_losses[-1]:.4f}")
print(f"  Loss reduction: {((train_losses[0] - train_losses[-1]) / train_losses[0] * 100):.1f}%")

print("\nFinal Performance:")
print(f"  Training accuracy: {final_train_acc:.1f}%")
print(f"  Test accuracy: {final_test_acc:.1f}%")

print("\nDataset Information:")
print(f"  Total samples: {len(X)}")
print(f"  Training samples: {len(X_train)}")
print(f"  Test samples: {len(X_test)}")
print(f"  Features: 2 (f1, f2)")
print(f"  Classes: 2 (binary classification)")

print("\n✓ All requirements fulfilled:")
print("  ✓ Used only basic PyTorch operations (no torch.nn)")
print("  ✓ Generated dataset using sklearn.datasets.make_classification()")
print("  ✓ Saved dataset to CSV file")
print("  ✓ Implemented Y = w^T * x + b with sigmoid activation")
print("  ✓ Used Binary Cross Entropy loss")
print("  ✓ Manual gradient computation and weight updates")
print("  ✓ Used GPU when available")
print("  ✓ Provided detailed comments explaining each step")