# Deep Learning with PyTorch

## Learning Objectives
By the end of this lesson, you will be able to:
- Understand what neural networks are and how they work
- Create and train neural networks with PyTorch
- Build models for image classification
- Understand tensors and automatic differentiation

## Core Concepts
- **Neural Network**: Computer model inspired by how brain neurons work
- **Tensor**: Multi-dimensional array (like advanced spreadsheet)
- **Layer**: Building block that processes and transforms data
- **Training**: Process of teaching the network to make good predictions
- **Gradient**: How much to adjust weights to improve performance

## 1. PyTorch Basics and Tensors

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

print(f"PyTorch version: {torch.__version__}")

# Tensors: PyTorch's data structure (like NumPy arrays but for deep learning)
print("🔢 TENSORS: Building blocks of neural networks")

# Create tensors
data_1d = torch.tensor([1.0, 2.0, 3.0, 4.0])
data_2d = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
random_data = torch.randn(3, 4)  # Random 3x4 tensor

print(f"1D tensor: {data_1d}")
print(f"2D tensor:\n{data_2d}")
print(f"Random tensor shape: {random_data.shape}")

# Tensor operations (similar to NumPy)
print(f"\nTensor math:")
print(f"Add: {data_1d + 1}")
print(f"Multiply: {data_1d * 2}")
print(f"Square: {data_1d ** 2}")

# Gradients: How neural networks learn
print(f"\n⚡ GRADIENTS: How learning happens")

# Create tensor that tracks gradients
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2 + 3 * x + 1  # Simple function

# Compute gradient (derivative)
y.backward()
print(f"x = {x.item()}")
print(f"y = x² + 3x + 1 = {y.item()}")
print(f"Gradient (dy/dx) = {x.grad.item()}")
print(f"Expected gradient = 2x + 3 = {2 * x.item() + 3}")

# Real example: Predicting house prices with tensors
print(f"\n🏠 PRACTICAL EXAMPLE: House price prediction")

# Sample data: [size, bedrooms] -> price
house_features = torch.tensor([[1200, 2], [1800, 3], [2400, 4], [1600, 3]], dtype=torch.float32)
house_prices = torch.tensor([[250000], [350000], [480000], [320000]], dtype=torch.float32)

print(f"House features shape: {house_features.shape}")
print(f"House prices shape: {house_prices.shape}")
print(f"Sample house: {house_features[0]} sqft → ${house_prices[0].item():,.0f}")

## 2. Building Your First Neural Network

In [None]:
# Create a simple neural network
print("🧠 BUILDING A NEURAL NETWORK")

class SimpleNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.layer2(x)
        return x

# Create network for house price prediction
model = SimpleNetwork(input_size=2, hidden_size=10, output_size=1)
print(f"Model created: {model}")

# Define loss function and optimizer
criterion = nn.MSELoss()  # For regression (predicting numbers)
optimizer = optim.Adam(model.parameters(), lr=0.01)

print(f"Loss function: Mean Squared Error")
print(f"Optimizer: Adam (learning rate = 0.01)")

# Training the network
print(f"\n🎯 TRAINING THE NETWORK")

# Normalize features (important for neural networks)
house_features_norm = (house_features - house_features.mean(dim=0)) / house_features.std(dim=0)
house_prices_norm = house_prices / 1000  # Scale prices to thousands

losses = []
for epoch in range(100):
    # Forward pass
    predictions = model(house_features_norm)
    loss = criterion(predictions, house_prices_norm)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    losses.append(loss.item())
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# Test the trained model
print(f"\n🔍 TESTING THE MODEL")
model.eval()
with torch.no_grad():
    test_predictions = model(house_features_norm) * 1000  # Scale back to original prices
    
print(f"Predictions vs Actual:")
for i in range(len(house_features)):
    actual = house_prices[i].item()
    predicted = test_predictions[i].item()
    error = abs(actual - predicted)
    print(f"House {i+1}: Predicted ${predicted:,.0f}, Actual ${actual:,.0f}, Error ${error:,.0f}")

# Visualize training progress
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(losses)
plt.title('Training Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.subplot(1, 2, 2)
actual_prices = house_prices.numpy().flatten()
pred_prices = test_predictions.numpy().flatten()
plt.scatter(actual_prices, pred_prices)
plt.plot([min(actual_prices), max(actual_prices)], [min(actual_prices), max(actual_prices)], 'r--')
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Predictions vs Actual')

plt.tight_layout()
plt.show()

# Make prediction for new house
new_house = torch.tensor([[2000, 3]], dtype=torch.float32)  # 2000 sqft, 3 bedrooms
new_house_norm = (new_house - house_features.mean(dim=0)) / house_features.std(dim=0)

with torch.no_grad():
    new_prediction = model(new_house_norm) * 1000
    print(f"\nNew house prediction: 2000 sqft, 3 bedrooms → ${new_prediction.item():,.0f}")

## 3. Classification with Neural Networks

In [None]:
# Classification: Predicting categories (not numbers)
print("🎯 CLASSIFICATION: Customer purchase prediction")

# Generate sample customer data
X, y = make_classification(n_samples=1000, n_features=4, n_classes=2, random_state=42)
feature_names = ['age', 'income', 'web_visits', 'cart_value']

# Convert to tensors
X_tensor = torch.FloatTensor(X)
y_tensor = torch.LongTensor(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

print(f"Training data: {X_train.shape[0]} customers")
print(f"Test data: {X_test.shape[0]} customers")
print(f"Features: {feature_names}")

# Classification network
class ClassificationNet(nn.Module):
    def __init__(self):
        super(ClassificationNet, self).__init__()
        self.layer1 = nn.Linear(4, 16)
        self.layer2 = nn.Linear(16, 8)
        self.layer3 = nn.Linear(8, 2)  # 2 classes: buy/not buy
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x

# Create and train classification model
clf_model = ClassificationNet()
criterion = nn.CrossEntropyLoss()  # For classification
optimizer = optim.Adam(clf_model.parameters(), lr=0.001)

print(f"\n🔥 TRAINING CLASSIFICATION MODEL")

train_losses = []
train_accuracies = []

for epoch in range(50):
    # Training
    clf_model.train()
    outputs = clf_model(X_train)
    loss = criterion(outputs, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Calculate accuracy
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_train).float().mean()
    
    train_losses.append(loss.item())
    train_accuracies.append(accuracy.item())
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}, Accuracy = {accuracy.item():.3f}")

# Evaluate on test data
clf_model.eval()
with torch.no_grad():
    test_outputs = clf_model(X_test)
    _, test_predictions = torch.max(test_outputs.data, 1)
    test_accuracy = (test_predictions == y_test).float().mean()
    
    # Get prediction probabilities
    test_probs = torch.softmax(test_outputs, dim=1)

print(f"\nTest accuracy: {test_accuracy.item():.3f} ({test_accuracy.item()*100:.1f}%)")

# Example predictions
print(f"\nSample predictions:")
for i in range(5):
    features = X_test[i]
    actual = y_test[i].item()
    predicted = test_predictions[i].item()
    confidence = test_probs[i][predicted].item()
    
    print(f"Customer {i+1}: Predicted {'Buy' if predicted==1 else 'No Buy'} "
          f"(Actual: {'Buy' if actual==1 else 'No Buy'}), Confidence: {confidence:.2f}")

# Visualize training progress
plt.figure(figsize=(12, 4))

plt.subplot(1, 3, 1)
plt.plot(train_losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.subplot(1, 3, 2)
plt.plot(train_accuracies)
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.subplot(1, 3, 3)
# Confusion matrix visualization
actual_np = y_test.numpy()
pred_np = test_predictions.numpy()
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(actual_np, pred_np)

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted')
plt.ylabel('Actual')

plt.tight_layout()
plt.show()

print(f"\n💡 KEY INSIGHTS:")
print(f"✅ Neural networks can solve both regression and classification")
print(f"✅ More layers can capture complex patterns")
print(f"✅ Dropout prevents overfitting")
print(f"✅ CrossEntropyLoss for classification, MSELoss for regression")
print(f"✅ Always evaluate on separate test data")

# Practice Exercises

In [None]:
# Exercise 1: Stock price prediction
print("Exercise 1: Stock Price Prediction")
print("Build a neural network to predict stock prices")

# Generate sample stock data
days = torch.arange(100, dtype=torch.float32).reshape(-1, 1)
prices = 100 + 0.5 * days + 10 * torch.sin(days / 10) + torch.randn(100, 1) * 5

# Create sequences (use past 5 days to predict next day)
def create_sequences(data, seq_length=5):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return torch.stack(X), torch.stack(y)

X_stock, y_stock = create_sequences(prices)
print(f"Stock data shape: {X_stock.shape} -> {y_stock.shape}")

# Your task: Build and train a network for stock prediction
# Hint: Use nn.LSTM or multiple nn.Linear layers

# Exercise 2: Image classification with MNIST-style data
print(f"\nExercise 2: Simple Image Classification")

# Create simple 8x8 "images" (like digits)
def create_simple_images(n_samples=200):
    images = torch.randn(n_samples, 64)  # 8x8 = 64 pixels
    # Create patterns: positive values = class 1, negative = class 0
    labels = (images.mean(dim=1) > 0).long()
    return images, labels

img_data, img_labels = create_simple_images()
print(f"Image data: {img_data.shape}, Labels: {img_labels.shape}")

# Your task: Build CNN-style network for image classification
class ImageNet(nn.Module):
    def __init__(self):
        super(ImageNet, self).__init__()
        self.layer1 = nn.Linear(64, 32)
        self.layer2 = nn.Linear(32, 16)
        self.layer3 = nn.Linear(16, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x

# Train the image classifier
img_model = ImageNet()
img_criterion = nn.CrossEntropyLoss()
img_optimizer = optim.Adam(img_model.parameters(), lr=0.001)

# Split data
X_img_train, X_img_test, y_img_train, y_img_test = train_test_split(
    img_data, img_labels, test_size=0.2, random_state=42)

# Quick training loop
print("Training image classifier...")
for epoch in range(30):
    outputs = img_model(X_img_train)
    loss = img_criterion(outputs, y_img_train)
    
    img_optimizer.zero_grad()
    loss.backward()
    img_optimizer.step()
    
    if epoch % 10 == 0:
        _, predicted = torch.max(outputs, 1)
        accuracy = (predicted == y_img_train).float().mean()
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}, Accuracy = {accuracy:.3f}")

# Test accuracy
with torch.no_grad():
    test_outputs = img_model(X_img_test)
    _, test_pred = torch.max(test_outputs, 1)
    test_acc = (test_pred == y_img_test).float().mean()
    print(f"Test accuracy: {test_acc:.3f}")

# Exercise 3: Experiment with architecture
print(f"\nExercise 3: Architecture Experiments")
print("Try these modifications and see how performance changes:")

# Different architectures to try
architectures = {
    'Small': [4, 8, 2],
    'Medium': [4, 16, 8, 2], 
    'Large': [4, 32, 16, 8, 2],
    'Deep': [4, 16, 16, 16, 16, 2]
}

def create_network(layers):
    modules = []
    for i in range(len(layers)-1):
        modules.append(nn.Linear(layers[i], layers[i+1]))
        if i < len(layers)-2:  # No activation after last layer
            modules.append(nn.ReLU())
    return nn.Sequential(*modules)

print("Architecture comparison:")
for name, layers in architectures.items():
    model = create_network(layers)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"{name}: {layers} -> {total_params} parameters")

# Exercise 4: Regularization techniques
print(f"\nExercise 4: Preventing Overfitting")

class RegularizedNet(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(RegularizedNet, self).__init__()
        self.layer1 = nn.Linear(4, 32)
        self.layer2 = nn.Linear(32, 16)
        self.layer3 = nn.Linear(16, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.batch_norm = nn.BatchNorm1d(32)
    
    def forward(self, x):
        x = self.batch_norm(self.relu(self.layer1(x)))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x

print("Regularization techniques:")
print("✅ Dropout: Randomly turn off neurons during training")
print("✅ Batch Normalization: Normalize inputs to each layer")
print("✅ Weight Decay: Add penalty for large weights")
print("✅ Early Stopping: Stop training when validation loss increases")

# Exercise 5: Learning rate experiments
print(f"\nExercise 5: Learning Rate Tuning")
learning_rates = [0.1, 0.01, 0.001, 0.0001]

print("Try different learning rates:")
for lr in learning_rates:
    print(f"Learning rate {lr}: {'Too high (loss explodes)' if lr > 0.01 else 'Good' if lr >= 0.001 else 'Too low (slow learning)'}")

# Key takeaways
print(f"\n🎯 KEY TAKEAWAYS:")
print(f"✅ Start simple, then add complexity")
print(f"✅ More data usually beats more complex models") 
print(f"✅ Regularization prevents overfitting")
print(f"✅ Learning rate is crucial for training")
print(f"✅ Always validate on separate test data")
print(f"✅ PyTorch gives you full control over the training process")

# Next steps
print(f"\n🚀 NEXT STEPS:")
print(f"1. Try convolutional layers for image data")
print(f"2. Experiment with LSTM for sequence data")
print(f"3. Use pre-trained models (transfer learning)")
print(f"4. Deploy models to production")
print(f"5. Learn about transformers and attention mechanisms")