In [None]:
# Sumani
# 20-7-2024

# Introduction

This notebook explains the concepts of overfitting and underfitting, common issues encountered when training machine learning and deep learning models.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Seed for reproducibility
np.random.seed(0)
_ = torch.manual_seed(0)

In [None]:
# Generate quadratic data with noise
def generate_data(n_samples=100):
    X = np.linspace(-10, 10, n_samples)
    y = 0.5 * X**2 + 3 * X + 10 + np.random.randn(n_samples) * 10  # Quadratic pattern with noise
    return X, y

X, y = generate_data()

# Plot the generated data
plt.scatter(X, y, label='Data points')
plt.title('Quadratic Data with Noise')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()

In [None]:
# Simple Linear Model
class LinearModel(nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        return self.linear(x)


In [None]:
class OverfittingModel(nn.Module):
    def __init__(self):
        super(OverfittingModel, self).__init__()
        self.fc1 = nn.Linear(1, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 128)
        self.fc4 = nn.Linear(128, 32)
        self.fc5 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        return self.fc5(x)

In [None]:
def train_model(model, X_train, y_train, num_epochs=1000, learning_rate=0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
    y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
    
    for epoch in range(num_epochs):
        model.train()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return model

In [None]:
def plot_model(model, X, y, title):
    model.eval()
    X_tensor = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
    with torch.no_grad():
        y_pred = model(X_tensor).numpy()

    plt.scatter(X, y, label='Data points')
    plt.plot(X, y_pred, color='red', label='Model prediction')
    plt.title(title)
    plt.xlabel('X')
    plt.ylabel('y')
    plt.legend()
    plt.show()

In [None]:
# Underfitting model
linear_model = LinearModel()
linear_model = train_model(linear_model, X, y, num_epochs=1000)

# Plot results for underfitting model
plot_model(linear_model, X, y, 'Underfitting Model (Linear Regression)')

In [None]:
# Overfitting model
overfitting_model = OverfittingModel()
overfitting_model = train_model(overfitting_model, X, y, num_epochs=1000)

# Plot results for overfitting model
plot_model(overfitting_model, X, y, 'Overfitting Model (Deep Neural Network)')

In [None]:
class IdealModel(nn.Module):
    def __init__(self):
        super(IdealModel, self).__init__()
        self.fc1 = nn.Linear(1, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
# Ideal model
ideal_model = IdealModel()
ideal_model = train_model(ideal_model, X, y, num_epochs=1000)

plot_model(ideal_model, X, y, 'Ideal Model (Polynomial Regression)')

# Exercise

In [None]:
# Exercise 1 - Change the number of epochs in the OverfittingModel and find the right epochs that reduces the overfitting issue

In [None]:
# Exercise 2 - Change the number of neurons in the hidden layers of the OverfittingModel and find the right model architecture that reduces the overfitting issue

In [None]:
# Exercise 3 - Change the layers of the OverfittingModel and find the right model architecture that reduces the overfitting issue

In [None]:
# Exercise 4 - Find advanced regularization techniques and apply those techniques to the 
# same OverfittingModel (i.e., do not change the original OverfittingModel's architecture) and derive a better model
# with the help of the advanced regularization techniques