In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [None]:
# Set random seeds for reproducibility
torch.manual_seed(42)

In [None]:
# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
df = pd.read_csv('fashion-mnist_train.csv')
df.head()

In [None]:
df.shape

In [None]:
# Create a 4x4 grid of images
fig, axes = plt.subplots(4, 4, figsize=(10, 10))
fig.suptitle("First 16 Images", fontsize=16)

# Plot the first 16 images from the dataset
for i, ax in enumerate(axes.flat):
    img = df.iloc[i, 1:].values.reshape(28, 28)  # Reshape to 28x28
    ax.imshow(img)  # Display in grayscale
    ax.axis('off')  # Remove axis for a cleaner look
    ax.set_title(f"Label: {df.iloc[i, 0]}")  # Show the label

plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust layout to fit the title
plt.show()


In [None]:
# train test split

X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train = X_train/255.0
X_test = X_test/255.0

In [None]:
class CustomDataset(Dataset):

  def __init__(self, features, labels):

    # Convert to PyTorch tensors
    self.features = torch.tensor(features, dtype=torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [None]:
train_dataset = CustomDataset(X_train, y_train)

In [None]:
test_dataset = CustomDataset(X_test, y_test)

In [None]:
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

In [None]:
len(train_loader)

In [None]:
input_dim = 784
output_dim = 10

In [None]:
class myNN(nn.Module):

  def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layers,dropout_rate):
    super().__init__()

    layers = []
    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim, neurons_per_layers))
      layers.append(nn.BatchNorm1d(neurons_per_layers))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurons_per_layers

    layers.append(nn.Linear(neurons_per_layers, output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)
    


In [None]:
from tqdm import tqdm

In [None]:
def objective(trial):
    # Hyperparameters to optimize
    num_hidden_layers = trial.suggest_int('num_hidden_layers', 1, 5)
    neurons_per_layers = trial.suggest_int('neurons_per_layers', 8, 128, step=8)
    epochs = trial.suggest_int('epochs', 10, 100, step=10)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-1, log=True)

    # Data loaders setup
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    # Initialize model and move to device
    model = myNN(input_dim, output_dim, num_hidden_layers, neurons_per_layers, dropout_rate)
    model.to(device)

    # Training configuration
    criterion = nn.CrossEntropyLoss()
    
    # Optimizer selection
    optimizer_dict = {
        "Adam": optim.Adam,
        "SGD": optim.SGD,
        "RMSprop": optim.RMSprop
    }
    optimizer = optimizer_dict[optimizer_name](model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training phase
    for epoch in tqdm(range(epochs), desc='Epochs'):
        model.train()
        for batch_features, batch_labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}', leave=False):
            # Prepare batch data
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device)
            
            # Training step
            optimizer.zero_grad()
            output = model(batch_features)
            loss = criterion(output, batch_labels)
            loss.backward()
            optimizer.step()

    # Evaluation phase
    model.eval()
    total = 0
    correct = 0

    with torch.no_grad():
        for batch_features, batch_labels in train_loader:
            # Prepare batch data
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device)
            
            # Make predictions
            outputs = model(batch_features)
            _, predicted = torch.max(outputs, 1)
            
            # Calculate accuracy
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()
            
        accuracy = correct / total
        return accuracy

In [None]:
# Import optuna library for hyperparameter optimization
import optuna

# Create an optimization study that tries to maximize the objective
study = optuna.create_study(direction="maximize")

In [None]:
# Run optimization study with 50 trials
study.optimize(objective, n_trials=50)

In [None]:
# Get the best validation accuracy from the study
print(f"Best validation accuracy: {study.best_value}")

# Get the best hyperparameters found during optimization
print(f"Best hyperparameters: {study.best_params}")


In [None]:
# learning_rate = 0.1
# epochs = 100

In [None]:
# # instatiate the model
# model = MyNN(X_train.shape[1])
# model = model.to(device)
# # loss function
# criterion = nn.CrossEntropyLoss()
# # optimizer
# optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [None]:
# from tqdm import tqdm

In [None]:
# # Training loop - iterate through epochs
# for epoch in tqdm(range(epochs), desc='Epochs'):
#     # Track metrics for this epoch
#     running_loss = 0.0
    
#     # Inner loop - process each batch of data
#     for batch_features, batch_labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}', leave=False):
#         # Move batch data to specified device (CPU/GPU)
#         batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
#         # Reset gradients to zero before each backward pass
#         optimizer.zero_grad()
        
#         # Forward pass - get model predictions
#         output = model(batch_features)
        
#         # Calculate loss between predictions and actual labels
#         loss = criterion(output, batch_labels)
        
#         # Backward pass - compute gradients
#         loss.backward()
        
#         # Update model parameters using optimizer
#         optimizer.step()
        
#         # Accumulate running loss
#         running_loss += loss.item()
    
#     # Calculate and display average loss for the epoch
#     epoch_loss = running_loss / len(train_loader)
#     print(f'\nEpoch {epoch+1}/{epochs} - Average Loss: {epoch_loss:.4f}')


In [None]:
# model.eval()

In [None]:
# # TRAIN EVALUATION

# # Initialize counters for tracking model performance
# total = 0       # Total number of test samples
# correct = 0     # Number of correct predictions

# # Disable gradient calculation since we're only doing inference
# with torch.no_grad():
#     # Iterate through test data batches
#     for batch_features, batch_labels in train_loader:

#         # Move batch data to specified device (CPU/GPU)
#         batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

#         # Get model predictions for current batch
#         outputs = model(batch_features)

#         # Get the highest probability prediction for each sample
#         _,predicted = torch.max(outputs, 1)

#         # Add batch size to total count
#         total += batch_labels.size(0)
        
#         # Count correct predictions in this batch
#         correct += (predicted == batch_labels).sum().item()

# # Calculate and print final accuracy
# print("TRAIN Accuracy:", correct/total)

In [None]:
# # TEST EVALUATION

# # Initialize counters for tracking model performance
# total = 0       # Total number of test samples
# correct = 0     # Number of correct predictions

# # Disable gradient calculation since we're only doing inference
# with torch.no_grad():
#     # Iterate through test data batches
#     for batch_features, batch_labels in test_loader:

#         # Move batch data to specified device (CPU/GPU)
#         batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

#         # Get model predictions for current batch
#         outputs = model(batch_features)

#         # Get the highest probability prediction for each sample
#         _,predicted = torch.max(outputs, 1)

#         # Add batch size to total count
#         total += batch_labels.size(0)
        
#         # Count correct predictions in this batch
#         correct += (predicted == batch_labels).sum().item()

# # Calculate and print final accuracy
# print("TEST Accuracy:", correct/total)

In [None]:
# Overfiting is a problem in machine learning where the model learns the training data too well and performs poorly on new data.
# Regularization is a technique used to prevent overfitting in machine learning models.
# Dropout is a regularization technique used in neural networks to prevent overfitting.
# Dropout randomly sets a fraction of the input units to 0 at each update during training time, which helps to prevent overfitting.