
## Configuration Variables


In [1]:
DATASET_TRAIN_PATH = '../datasets/lab1_dataset/mnist_train.csv'
DATASET_TEST_PATH = '../datasets/lab1_dataset/mnist_test.csv'

INPUT_DIMS= 28 * 28
HIDDEN_FEATURE_DIMS = 1024
OUTPUT_CLASSES = 10
TRAIN_BATCH_SIZE = 64
TEST_BATCH_SIZE = 10
LERANING_RATE = 0.001
EPOCHS = 2

## Data Load

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from torchvision import transforms

# Custom dataset class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        # Load the CSV file
        self.data = pd.read_csv(csv_file)
        self.transform = transform

        # Assuming the first column contains labels and the rest are pixel values
        self.labels = self.data.iloc[:, 0].values  # First column is the label
        self.images = self.data.iloc[:, 1:].values.astype('float32')  # Remaining columns are pixel values

        # Normalize the pixel values (subtract the mean and divide by std dev)
        # Calculate mean and std for normalization
        self.mean = np.mean(self.images)
        self.std_dev = np.std(self.images)

        # Normalize using the calculated mean and std
        self.images = (self.images - self.mean) / self.std_dev

        # Reshape the images to 28x28
        self.images = self.images.reshape(-1, 28, 28)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            # Apply transformations (if any)
            image = self.transform(image)

        return image, label

# Define the transformations (you can modify them as needed)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Additional normalization to center data around 0
])

# Initialize the custom dataset using the CSV file path
mnist_dataset = CustomMNISTDataset(DATASET_TRAIN_PATH, transform=transform)
val_mnist_dataset = CustomMNISTDataset(DATASET_TEST_PATH, transform=transform)

# Use DataLoader for batching and shuffling
train_loader = DataLoader(mnist_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_mnist_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False)

# Example: Iterating through the dataset
for images, labels in train_loader:
    print(f'Batch images shape: {images.shape}')
    print(f'Batch labels shape: {labels.shape}')
    break


## Feed-Forward-Neural Networks

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# PyTorch models inherit from torch.nn.Module
class GarmentClassifier(nn.Module):
    def __init__(self, input_size=INPUT_DIMS, hidden_size=HIDDEN_FEATURE_DIMS, output_size=OUTPUT_CLASSES):
        super(GarmentClassifier, self).__init__()
        
        # Note: The input and output layers are not considered hidden layers.
        # Input layer to first hidden layer
        self.input_layer = nn.Linear(input_size, hidden_size)
        # First hidden layer to second hidden layer
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        # First hidden layer to second hidden layer
        self.fc2 = nn.Linear(hidden_size, hidden_size)        
        # Second hidden layer to output layer
        self.output_layer = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Flatten the input (batch_size, 1, 28, 28) -> (batch_size, 28*28)
        x = x.view(-1, 28*28)
        # Apply ReLU activation and pass through hidden layers
        x = F.relu(self.input_layer(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Output layer (no activation as it will be used with CrossEntropyLoss)
        x = self.output_layer(x)
        return x

model = GarmentClassifier()    

## Loss & Optimizer

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LERANING_RATE)

## Forward & Backword 

In [None]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.
    last_loss = 0.

    # Iterate over the training data
    for i, data in enumerate(train_loader):
        # Extract inputs (images) and labels
        inputs, labels = data

        # Zero the gradients for each batch
        optimizer.zero_grad()

        # Forward pass: compute model predictions for the batch
        outputs = model(inputs)

        # Compute the loss between predictions and actual labels
        loss = loss_fn(outputs, labels)

        # Backpropagate the loss and compute gradients
        loss.backward()

        # Update model weights based on the gradients
        optimizer.step()

        # Accumulate the loss
        running_loss += loss.item()

        # Log the loss every 1000 batches
        if i % 1000 == 999:
            last_loss = running_loss / 1000  # Calculate average loss over the last 1000 batches
            print(f'  batch {i + 1} loss: {last_loss}')
            
            # TensorBoard logging
            tb_x = epoch_index * len(train_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)

            # Reset the running loss for the next 1000 batches
            running_loss = 0.

    # Return the average loss of the last set of 1000 batches
    return last_loss


## Training & Evaluation Iterations

In [None]:
import time
import torch
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

# Setup TensorBoard SummaryWriter
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter(f'runs/fashion_trainer_{timestamp}')

# Constants
best_vloss = float('inf')  # Initialize to a very large number
epoch_number = 0

# Variables for measuring training and inference time
total_training_time = 0.0
total_inference_time = 0.0

# Lists to store epoch-wise times
training_times = []
inference_times_without_warmup = []

# Main training loop
for epoch in range(EPOCHS):
    print(f'=== EPOCH {epoch + 1} ===')

    # ---- Training Phase ----
    model.train(True)  # Set the model to training mode
    start_train_time = time.time()  # Record start time

    running_corrects = 0
    total_samples = 0

    # Train for one epoch
    avg_loss = train_one_epoch(epoch_number, writer)

    for i, data in enumerate(train_loader):
        inputs, labels = data
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)  # Get predicted class
        running_corrects += torch.sum(preds == labels).item()  # Accumulate correct predictions
        total_samples += labels.size(0)  # Accumulate total samples

    training_accuracy = running_corrects / total_samples  # Calculate accuracy

    end_train_time = time.time()  # Record end time
    epoch_training_time = end_train_time - start_train_time
    total_training_time += epoch_training_time  # Accumulate total training time
    training_times.append(epoch_training_time)

    print(f'Epoch {epoch + 1} Training Time: {epoch_training_time:.4f} seconds')
    print(f'Epoch {epoch + 1} Training Accuracy: {training_accuracy:.4f}')

    # ---- Validation (Inference) Phase ----
    model.eval()  # Set the model to evaluation mode
    running_vloss = 0.0
    inference_times = []

    running_vcorrects = 0
    total_vsamples = 0

    # No gradient computation during inference
    with torch.no_grad():
        for i, vdata in enumerate(val_loader):
            vinputs, vlabels = vdata
            start_inference_time = time.time()  # Start inference timer

            # Forward pass through the model
            voutputs = model(vinputs)
            _, vpreds = torch.max(voutputs, 1)  # Get predicted class
            running_vcorrects += torch.sum(vpreds == vlabels).item()  # Accumulate correct predictions
            total_vsamples += vlabels.size(0)  # Accumulate total validation samples

            end_inference_time = time.time()  # End inference timer
            inference_times.append(end_inference_time - start_inference_time)

            # Calculate validation loss
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss.item()

    # Calculate validation accuracy
    validation_accuracy = running_vcorrects / total_vsamples

    # Remove warm-up time (first inference time) for accurate latency measurement
    warmup_removed_times = inference_times[1:]
    if warmup_removed_times:  # Ensure there's data after warmup removal
        avg_inference_time = sum(warmup_removed_times) / len(warmup_removed_times)
        total_inference_time += avg_inference_time * len(warmup_removed_times)
        inference_times_without_warmup.append(avg_inference_time)
        print(f'Epoch {epoch + 1} Avg Inference Time (without warmup): {avg_inference_time:.6f} seconds')

    avg_vloss = running_vloss / (i + 1)
    print(f'LOSS - Train: {avg_loss:.4f}, Validation: {avg_vloss:.4f}')
    print(f'Epoch {epoch + 1} Validation Accuracy: {validation_accuracy:.4f}')

    # ---- Logging to TensorBoard ----
    writer.add_scalars('Training vs. Validation Loss',
                       {'Training': avg_loss, 'Validation': avg_vloss},
                       epoch + 1)
    
    writer.add_scalars('Accuracy',
                       {'Training': training_accuracy, 'Validation': validation_accuracy},
                       epoch + 1)
    writer.flush()

    # ---- Model Checkpoint ----
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = f'model_{timestamp}_epoch_{epoch + 1}.pth'
        # torch.save(model.state_dict(), model_path)
        # print(f"Model saved at {model_path} with validation loss {avg_vloss:.4f}")

    epoch_number += 1

# Final stats after training
print(f'Total Training Time: {total_training_time:.4f} seconds')
print(f'Total Inference Time (excluding warm-up): {total_inference_time:.4f} seconds')


## Combining Everything without Width & CPU

In [None]:
import time
import torch
import matplotlib.pyplot as plt
import os
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from thop import profile  # For FLOPs calculation
import torch.nn as nn
import torch.nn.functional as F

# Set device to CPU
device = torch.device('cpu')

# Create a folder to save the plots
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
plot_dir = f'plots_{timestamp}'
os.makedirs(plot_dir, exist_ok=True)

# Constants
EPOCHS = 3
depths = [1, 2, 3]  # Example depths for the model
learning_rate = 0.001

# Lists to store results
flops_list = []
param_counts = []
train_acc_list = []
val_acc_list = []
latency_list = []

# Function to get model FLOPs and parameter count
def get_model_metrics(model, input_size=(1, 28*28)):
    input_tensor = torch.randn(1, *input_size).to(device)  # Ensure the tensor is on the CPU
    flops, params = profile(model, inputs=(input_tensor,), verbose=False)
    return flops, params

# Loop through different depths and train/evaluate the model
for depth in depths:
    print(f'=== Training Model with Depth: {depth} ===')

    # Define the model with varying depth
    class CustomGarmentClassifier(nn.Module):
        def __init__(self, depth):
            super(CustomGarmentClassifier, self).__init__()
            self.input_layer = nn.Linear(28 * 28, HIDDEN_FEATURE_DIMS)
            self.hidden_layers = nn.ModuleList([nn.Linear(HIDDEN_FEATURE_DIMS, HIDDEN_FEATURE_DIMS) for _ in range(depth)])
            self.output_layer = nn.Linear(HIDDEN_FEATURE_DIMS, OUTPUT_CLASSES)
        
        def forward(self, x):
            x = x.view(-1, 28 * 28)
            x = F.relu(self.input_layer(x))
            for layer in self.hidden_layers:
                x = F.relu(layer(x))
            x = self.output_layer(x)
            return x

    # Instantiate and move model to the CPU
    model = CustomGarmentClassifier(depth).to(device)

    # Get FLOPs and parameter count
    flops, params = get_model_metrics(model)
    flops_list.append(flops)
    param_counts.append(params)

    # Initialize optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = torch.nn.CrossEntropyLoss()

    # Measure latency and accuracy
    train_acc, val_acc, latency = 0, 0, 0
    running_corrects_train = 0
    total_samples_train = 0
    total_latency = 0

    # ---- Training and Validation ----
    for epoch in range(EPOCHS):
        model.train(True)
        start_train_time = time.time()

        # Train for one epoch (simplified for this example)
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Ensure data is on the CPU
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            running_corrects_train += torch.sum(preds == labels).item()
            total_samples_train += labels.size(0)

        train_acc = running_corrects_train / total_samples_train

        end_train_time = time.time()
        epoch_training_time = end_train_time - start_train_time
        total_latency += epoch_training_time

        # Validation (Inference)
        model.eval()
        running_corrects_val = 0
        total_samples_val = 0

        with torch.no_grad():
            for vinputs, vlabels in val_loader:
                vinputs, vlabels = vinputs.to(device), vlabels.to(device)  # Ensure validation data is on the CPU
                voutputs = model(vinputs)
                _, vpreds = torch.max(voutputs, 1)
                running_corrects_val += torch.sum(vpreds == vlabels).item()
                total_samples_val += vlabels.size(0)

        val_acc = running_corrects_val / total_samples_val

    latency_list.append(total_latency / EPOCHS)
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)

    print(f'Depth: {depth}, FLOPs: {flops}, Params: {params}, Train Acc: {train_acc}, Val Acc: {val_acc}, Latency: {total_latency / EPOCHS}')

# ---- Plotting Results ----
# 1. FLOPs vs Accuracy
plt.figure()
plt.plot(flops_list, val_acc_list, marker='o')
plt.title('FLOPs vs Accuracy')
plt.xlabel('FLOPs')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(plot_dir, 'flops_vs_accuracy.png'))

# 2. FLOPs vs Latency
plt.figure()
plt.plot(flops_list, latency_list, marker='o')
plt.title('FLOPs vs Latency')
plt.xlabel('FLOPs')
plt.ylabel('Latency (s)')
plt.savefig(os.path.join(plot_dir, 'flops_vs_latency.png'))

# 3. Latency vs Accuracy
plt.figure()
plt.plot(latency_list, val_acc_list, marker='o')
plt.title('Latency vs Accuracy')
plt.xlabel('Latency (s)')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(plot_dir, 'latency_vs_accuracy.png'))

print(f'Plots saved to {plot_dir}')


# Combining Everything without Width

In [None]:
import time
import torch
import matplotlib.pyplot as plt
import os
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from thop import profile  # For FLOPs calculation
import torch.nn as nn
import torch.nn.functional as F

# Create a folder to save the plots
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
plot_dir = f'plots_{timestamp}'
os.makedirs(plot_dir, exist_ok=True)

# Constants
EPOCHS = 3
depths = [1, 2, 3]  # Example depths for the model
learning_rate = 0.001

# Lists to store results
flops_list = []
param_counts = []
train_acc_list = []
val_acc_list = []
latency_list = []

# Function to get model FLOPs and parameter count
def get_model_metrics(model, input_size=(1, 28*28)):
    input_tensor = torch.randn(1, *input_size)
    flops, params = profile(model, inputs=(input_tensor,), verbose=False)
    return flops, params

# Loop through different depths and train/evaluate the model
for depth in depths:
    print(f'=== Training Model with Depth: {depth} ===')

    # Define the model with varying depth
    class CustomGarmentClassifier(nn.Module):
        def __init__(self, depth):
            super(CustomGarmentClassifier, self).__init__()
            self.input_layer = nn.Linear(28 * 28, HIDDEN_FEATURE_DIMS)
            self.hidden_layers = nn.ModuleList([nn.Linear(HIDDEN_FEATURE_DIMS, HIDDEN_FEATURE_DIMS) for _ in range(depth)])
            self.output_layer = nn.Linear(HIDDEN_FEATURE_DIMS, OUTPUT_CLASSES)
        
        def forward(self, x):
            x = x.view(-1, 28 * 28)
            x = F.relu(self.input_layer(x))
            for layer in self.hidden_layers:
                x = F.relu(layer(x))
            x = self.output_layer(x)
            return x

    model = CustomGarmentClassifier(depth)

    # Get FLOPs and parameter count
    flops, params = get_model_metrics(model)
    flops_list.append(flops)
    param_counts.append(params)

    # Initialize optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = torch.nn.CrossEntropyLoss()

    # Measure latency and accuracy
    train_acc, val_acc, latency = 0, 0, 0
    running_corrects_train = 0
    total_samples_train = 0
    total_latency = 0

    # ---- Training and Validation ----
    for epoch in range(EPOCHS):
        model.train(True)
        start_train_time = time.time()

        # Train for one epoch (simplified for this example)
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            running_corrects_train += torch.sum(preds == labels).item()
            total_samples_train += labels.size(0)

        train_acc = running_corrects_train / total_samples_train

        end_train_time = time.time()
        epoch_training_time = end_train_time - start_train_time
        total_latency += epoch_training_time

        # Validation (Inference)
        model.eval()
        running_corrects_val = 0
        total_samples_val = 0

        with torch.no_grad():
            for vinputs, vlabels in val_loader:
                voutputs = model(vinputs)
                _, vpreds = torch.max(voutputs, 1)
                running_corrects_val += torch.sum(vpreds == vlabels).item()
                total_samples_val += vlabels.size(0)

        val_acc = running_corrects_val / total_samples_val

    latency_list.append(total_latency / EPOCHS)
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)

    print(f'Depth: {depth}, FLOPs: {flops}, Params: {params}, Train Acc: {train_acc}, Val Acc: {val_acc}, Latency: {total_latency / EPOCHS}')

# ---- Plotting Results ----
# 1. FLOPs vs Accuracy
plt.figure()
plt.plot(flops_list, val_acc_list, marker='o')
plt.title('FLOPs vs Accuracy')
plt.xlabel('FLOPs')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(plot_dir, 'flops_vs_accuracy.png'))

# 2. FLOPs vs Latency
plt.figure()
plt.plot(flops_list, latency_list, marker='o')
plt.title('FLOPs vs Latency')
plt.xlabel('FLOPs')
plt.ylabel('Latency (s)')
plt.savefig(os.path.join(plot_dir, 'flops_vs_latency.png'))

# 3. Latency vs Accuracy
plt.figure()
plt.plot(latency_list, val_acc_list, marker='o')
plt.title('Latency vs Accuracy')
plt.xlabel('Latency (s)')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(plot_dir, 'latency_vs_accuracy.png'))

print(f'Plots saved to {plot_dir}')


## Combining Everything with Width

In [None]:
import time
import torch
import matplotlib.pyplot as plt
import os
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from thop import profile  # For FLOPs calculation
import torch.nn as nn
import torch.nn.functional as F

# Function to create directories based on section input
def create_result_folders(section):
    base_dir = f'result/section{section}'
    os.makedirs(base_dir, exist_ok=True)
    
    result_dirs = []
    for i in range(1, 4):
        result_dir = f'{base_dir}_result{i}'
        os.makedirs(result_dir, exist_ok=True)
        result_dirs.append(result_dir)
    
    return result_dirs

# Constants
EPOCHS = 3
INPUT_DIMS = 28 * 28
learning_rate = 0.001

# Define different widths and depths
widths = [512, 1024, 2048]  # Example widths (narrower and wider than input size)
depths = [1, 2, 3]  # Example depths

# Lists to store results
flops_list = []
param_counts = []
train_acc_list = []
val_acc_list = []
train_latency_list = []
inference_latency_list = []
single_batch_latency_list = []  # List for single-batch inference latencies
single_batch_flops_list = []  # List for single-batch FLOPs

# Function to get model FLOPs and parameter count
def get_model_metrics(model, input_size=(1, INPUT_DIMS)):
    input_tensor = torch.randn(1, *input_size)
    flops, params = profile(model, inputs=(input_tensor,), verbose=False)
    return flops, params

# Ask user for section number
section_number = input("Enter section number: ")

# Choose mode: depth or width variation
mode = input("Choose mode (depth/width): ").strip().lower()

# Create result folders based on the section number
result_dirs = create_result_folders(section_number)

# Loop through different depths or widths based on the selected mode
if mode == "depth":
    variation = depths
    print(f'Varying model depth: {depths}')
elif mode == "width":
    variation = widths
    print(f'Varying model width: {widths}')
else:
    raise ValueError("Invalid mode selected. Choose either 'depth' or 'width'.")

for var in variation:
    print(f'=== Training Model with {mode.capitalize()}: {var} ===')

    # Define the model based on depth or width
    class CustomGarmentClassifier(nn.Module):
        def __init__(self, var, mode):
            super(CustomGarmentClassifier, self).__init__()
            self.input_layer = nn.Linear(INPUT_DIMS, var if mode == 'width' else INPUT_DIMS)
            
            if mode == 'depth':
                # Vary depth: dynamically create hidden layers based on depth
                self.hidden_layers = nn.ModuleList([nn.Linear(INPUT_DIMS, INPUT_DIMS) for _ in range(var)])
                self.output_layer = nn.Linear(INPUT_DIMS, 10)
            else:
                # Vary width: keep the same number of hidden layers, but change the width
                self.hidden_layer = nn.Linear(var, var)
                self.output_layer = nn.Linear(var, 10)

        def forward(self, x):
            x = x.view(-1, INPUT_DIMS)
            x = F.relu(self.input_layer(x))

            if mode == 'depth':
                # Apply multiple hidden layers for depth variation
                for layer in self.hidden_layers:
                    x = F.relu(layer(x))
            else:
                # Apply a single hidden layer for width variation
                x = F.relu(self.hidden_layer(x))

            x = self.output_layer(x)
            return x

    # Create the model
    model = CustomGarmentClassifier(var, mode)

    # Get FLOPs and parameter count
    flops, params = get_model_metrics(model)
    flops_list.append(flops)
    param_counts.append(params)

    # Initialize optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = torch.nn.CrossEntropyLoss()

    # Measure latency and accuracy
    train_acc, val_acc, train_latency, inference_latency = 0, 0, 0, 0
    running_corrects_train = 0
    total_samples_train = 0
    total_train_latency = 0
    total_inference_latency = 0

    # ---- Training and Validation ----
    for epoch in range(EPOCHS):
        model.train(True)
        start_train_time = time.time()

        # Train for one epoch (simplified for this example)
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            running_corrects_train += torch.sum(preds == labels).item()
            total_samples_train += labels.size(0)

        train_acc = running_corrects_train / total_samples_train

        end_train_time = time.time()
        epoch_training_time = end_train_time - start_train_time
        total_train_latency += epoch_training_time

        # Validation (Inference) - Measure Inference Latency
        model.eval()
        running_corrects_val = 0
        total_samples_val = 0

        with torch.no_grad():
            for vinputs, vlabels in val_loader:
                start_inference_time = time.time()  # Start timing inference

                voutputs = model(vinputs)
                _, vpreds = torch.max(voutputs, 1)
                running_corrects_val += torch.sum(vpreds == vlabels).item()
                total_samples_val += vlabels.size(0)

                end_inference_time = time.time()  # End timing inference
                total_inference_latency += (end_inference_time - start_inference_time)

        val_acc = running_corrects_val / total_samples_val

    train_latency_list.append(total_train_latency / EPOCHS)  # Average training latency per epoch
    inference_latency_list.append(total_inference_latency / len(val_loader))  # Average inference latency per batch
    val_acc_list.append(val_acc)
    train_acc_list.append(train_acc)

    print(f'{mode.capitalize()}: {var}, FLOPs: {flops}, Params: {params}, Train Acc: {train_acc}, Val Acc: {val_acc}, Train Latency: {total_train_latency / EPOCHS}, Inference Latency: {total_inference_latency / len(val_loader)}')

    # ---- Single-Batch Inference ----
    # Perform single-batch inference and measure FLOPs and latency
    model.eval()
    single_batch = next(iter(val_loader))
    inputs, labels = single_batch

    with torch.no_grad():
        start_single_batch_time = time.time()  # Start timing single-batch inference

        # Forward pass for single batch
        outputs = model(inputs)

        end_single_batch_time = time.time()  # End timing
        single_batch_latency = end_single_batch_time - start_single_batch_time

        # Calculate FLOPs for single-batch inference
        single_batch_flops, _ = profile(model, inputs=(inputs,), verbose=False)

    # Store latency and FLOPs
    single_batch_latency_list.append(single_batch_latency)
    single_batch_flops_list.append(single_batch_flops)

    print(f'Single-Batch Inference for {mode.capitalize()} {var}: FLOPs: {single_batch_flops}, Latency: {single_batch_latency:.6f} seconds')

# ---- Plotting Results ----
# 1. FLOPs vs Accuracy
plt.figure()
plt.plot(flops_list, val_acc_list, marker='o')
plt.title('FLOPs vs Accuracy')
plt.xlabel('FLOPs')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(result_dirs[0], 'flops_vs_accuracy.png'))

# 2. FLOPs vs Latency (Single-Batch Inference)
plt.figure()
plt.plot(single_batch_flops_list, single_batch_latency_list, marker='o')
plt.title('FLOPs vs Single-Batch Inference Latency')
plt.xlabel('FLOPs')
plt.ylabel('Single-Batch Inference Latency (s)')
plt.savefig(os.path.join(result_dirs[1], 'flops_vs_latency.png'))

# 3. Latency vs Accuracy (Single-Batch Inference)
plt.figure()
plt.plot(single_batch_latency_list, val_acc_list, marker='o')
plt.title('Single-Batch Inference Latency vs Accuracy')
plt.xlabel('Single-Batch Inference Latency (s)')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(result_dirs[2], 'latency_vs_accuracy.png'))

print(f'Plots saved in {result_dirs}')


In [None]:
# Enter section number:  s4
# Choose mode (depth/width):  width
# Varying model width: [512, 1024, 2048]
# === Training Model with Width: 512 ===
# Width: 512, FLOPs: 668672.0, Params: 669706.0, Train Acc: 0.9579881886920337, Val Acc: 0.9677967796779678, Train Latency: 10.520967165629068, Inference Latency: 0.0012251343727111817
# Single-Batch Inference for Width 512: FLOPs: 6686720.0, Latency: 0.000338 seconds
# === Training Model with Width: 1024 ===

In [None]:
# . Experiment Setup:
# Depth Variation: We varied the depth of the model across 1, 2, and 3 hidden layers to observe how the model's complexity affects performance.
# Metrics Measured: For each model configuration, we tracked FLOPs (Floating Point Operations), latency (training time per epoch), and accuracy (both training and validation).
# Efficiency Metrics: FLOPs and latency are used to measure the computational cost, while accuracy reflects model performance.
# 2. Key Tradeoffs:
# FLOPs vs Accuracy: A higher number of FLOPs indicates more computational complexity, but this doesn’t always correlate with better accuracy. A balance needs to be struck where the increase in complexity improves accuracy without diminishing returns.
# FLOPs vs Latency: Models with higher FLOPs generally take longer to train. However, hardware and optimization strategies can influence this relationship.
# Latency vs Accuracy: This relationship is crucial when optimizing models for real-time applications. A model that takes too long to compute may not be practical even if it achieves high accuracy.
# 3. Results and Trends:
# Accuracy vs Depth: As the depth of the model increases, the accuracy typically improves. However, this increase may taper off beyond a certain depth, where the marginal gains in accuracy are minimal compared to the computational cost.
# FLOPs Impact: Models with more FLOPs tend to be more accurate, but the increase in computational cost might not justify the small improvement in accuracy for larger models.
# Latency Impact: Depth and width variations both affect latency. Larger models take more time to train, and the relationship between latency and accuracy highlights the tradeoff between efficiency and performance.
# 4. Best Tradeoff:
# The best trade-off is often found with a moderate depth (e.g., depth 2), where the accuracy is reasonably high, but the computational cost (in terms of both FLOPs and latency) is not prohibitively expensive. For some applications, slight reductions in accuracy may be acceptable in exchange for significantly lower computational costs.
# Conclusion:
# The experiments and visualizations provide insights into how varying model depth affects the tradeoffs between accuracy and efficiency. The generated plots illustrate how FLOPs and latency scale with accuracy, giving a clear picture of the efficiency-performance tradeoff in neural networks.

## Data Loader & Training code integration

In [None]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from PIL import Image
import time
import matplotlib.pyplot as plt
import os
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from thop import profile  # For FLOPs calculation
import torch.nn as nn
import torch.nn.functional as F

# Define CustomMNISTDataset
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.labels = self.data.iloc[:, 0].values  # First column is the label
        self.images = self.data.iloc[:, 1:].values.astype('float32')  # Remaining columns are pixel values

        # Normalize using the calculated mean and std
        self.mean = np.mean(self.images)
        self.std_dev = np.std(self.images)
        self.images = (self.images - self.mean) / self.std_dev

        # Reshape the images to 28x28 initially
        self.images = self.images.reshape(-1, 28, 28)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # Convert the NumPy array to a PIL image
        image = Image.fromarray((image * 255).astype(np.uint8))  # Convert normalized image back to 0-255 for PIL

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations for resizing, cropping, and no transformation
resize_transform_14 = transforms.Compose([
    transforms.Resize((14, 14)),  # Resize to 14x14
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

resize_transform_20 = transforms.Compose([
    transforms.Resize((20, 20)),  # Resize to 20x20
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

crop_transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Ensure image is 28x28 before cropping
    transforms.CenterCrop(20),    # Center crop to 20x20
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

no_transform = transforms.Compose([
    transforms.ToTensor(),        # No resizing or cropping, just convert to tensor
    transforms.Normalize((0.5,), (0.5,))
])

# Prompt user to choose a transformation
print("Choose a transformation:")
print("1: Resize to 14x14")
print("2: Resize to 20x20")
print("3: Center Crop to 20x20")
print("4: No Transformation (28x28)")

transformation_choice = input("Enter the number of the transformation (1, 2, 3, or 4): ").strip()

# Dynamically set INPUT_DIMS based on transformation
if transformation_choice == "1":
    transform = resize_transform_14
    INPUT_DIMS = 14 * 14  # For 14x14 input, INPUT_DIMS is 196
elif transformation_choice == "2":
    transform = resize_transform_20
    INPUT_DIMS = 20 * 20  # For 20x20 input, INPUT_DIMS is 400
elif transformation_choice == "3":
    transform = crop_transform
    INPUT_DIMS = 20 * 20  # For 20x20 crop, INPUT_DIMS is 400
elif transformation_choice == "4":
    transform = no_transform
    INPUT_DIMS = 28 * 28  # For 28x28 input, INPUT_DIMS is 784

# Define constants
TRAIN_BATCH_SIZE = 64
TEST_BATCH_SIZE = 10

# Load datasets with selected transformation
mnist_dataset = CustomMNISTDataset(DATASET_TRAIN_PATH, transform=transform)
val_mnist_dataset = CustomMNISTDataset(DATASET_TEST_PATH, transform=transform)

# Use DataLoader for batching and shuffling
train_loader = DataLoader(mnist_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_mnist_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False)

# Function to create directories based on section input
def create_result_folders(section):
    base_dir = f'result/section{section}'
    os.makedirs(base_dir, exist_ok=True)
    
    result_dirs = []
    for i in range(1, 4):
        result_dir = f'{base_dir}_result{i}'
        os.makedirs(result_dir, exist_ok=True)
        result_dirs.append(result_dir)
    
    return result_dirs

# Constants for the training
EPOCHS = 2
learning_rate = 0.001
widths = [1024]  # Example widths (narrower and wider than input size)
depths = [1024]  # Example depths
DEPTH_LAYER = 2

# Function to get model FLOPs and parameter count
def get_model_metrics(model, input_size=(1, INPUT_DIMS)):
    input_tensor = torch.randn(1, *input_size)
    flops, params = profile(model, inputs=(input_tensor,), verbose=False)
    return flops, params

# Ask user for section number
section_number = input("Enter section number: ")

# Choose mode: depth or width variation
mode = input("Choose mode (depth/width): ").strip().lower()

# Create result folders based on the section number
result_dirs = create_result_folders(section_number)

# Lists to store results
flops_list = []
param_counts = []
train_acc_list = []
val_acc_list = []
train_latency_list = []
inference_latency_list = []
single_batch_latency_list = []
single_batch_flops_list = []

# Loop through different depths or widths based on the selected mode
if mode == "depth":
    variation = depths
    depth_layer = DEPTH_LAYER
elif mode == "width":
    variation = widths
else:
    raise ValueError("Invalid mode selected. Choose either 'depth' or 'width'.")

for var in variation:
    print(f'=== Training Model with {mode.capitalize()}: {var} ===')

    # Define the model based on depth or width
    class CustomGarmentClassifier(nn.Module):
        def __init__(self, var, mode):
            super(CustomGarmentClassifier, self).__init__()
            self.input_layer = nn.Linear(INPUT_DIMS, var if mode == 'width' else var)
            
            if mode == 'depth':
                # Vary depth: dynamically create hidden layers based on depth
                self.hidden_layers = nn.ModuleList([nn.Linear(var, var) for _ in range(depth_layer)])
                self.output_layer = nn.Linear(var, 10)
            else:
                # Vary width: keep the same number of hidden layers, but change the width
                self.hidden_layer = nn.Linear(var, var)
                self.output_layer = nn.Linear(var, 10)
                
        def forward(self, x):
            x = x.view(-1, INPUT_DIMS)
            x = F.relu(self.input_layer(x))

            if mode == 'depth':
                # Apply multiple hidden layers for depth variation
                for layer in self.hidden_layers:
                    x = F.relu(layer(x))
            else:
                # Apply a single hidden layer for width variation
                x = F.relu(self.hidden_layer(x))

            x = self.output_layer(x)
            return x

    # Create the model
    model = CustomGarmentClassifier(var, mode)

    # Get FLOPs and parameter count
    flops, params = get_model_metrics(model)
    flops_list.append(flops)
    param_counts.append(params)

    # Initialize optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = torch.nn.CrossEntropyLoss()

    # Measure latency and accuracy
    running_corrects_train = 0
    total_samples_train = 0
    total_train_latency = 0
    total_inference_latency = 0

    # ---- Training and Validation ----
    for epoch in range(EPOCHS):
        model.train(True)
        start_train_time = time.time()

        # Train for one epoch
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            running_corrects_train += torch.sum(preds == labels).item()
            total_samples_train += labels.size(0)

        train_acc = running_corrects_train / total_samples_train
        total_train_latency += time.time() - start_train_time

        # Validation (Inference)
        model.eval()
        running_corrects_val = 0
        total_samples_val = 0

        with torch.no_grad():
            for vinputs, vlabels in val_loader:
                start_inference_time = time.time()
                voutputs = model(vinputs)
                _, vpreds = torch.max(voutputs, 1)
                running_corrects_val += torch.sum(vpreds == vlabels).item()
                total_samples_val += vlabels.size(0)
                total_inference_latency += time.time() - start_inference_time

        val_acc = running_corrects_val / total_samples_val

    train_latency_list.append(total_train_latency / EPOCHS)  # Average training latency per epoch
    inference_latency_list.append(total_inference_latency / len(val_loader))  # Average inference latency per batch
    val_acc_list.append(val_acc)
    train_acc_list.append(train_acc)

    print(f'{mode.capitalize()}: {var}, FLOPs: {flops}, Params: {params}, Train Acc: {train_acc}, Val Acc: {val_acc}')

    # ---- Single-Batch Inference ----
    # Perform single-batch inference and measure FLOPs and latency
    model.eval()
    single_batch = next(iter(val_loader))
    inputs, labels = single_batch

    with torch.no_grad():
        start_single_batch_time = time.time()
        outputs = model(inputs)
        single_batch_latency = time.time() - start_single_batch_time

        # Calculate FLOPs for single-batch inference
        single_batch_flops, _ = profile(model, inputs=(inputs,), verbose=False)

    single_batch_latency_list.append(single_batch_latency)
    single_batch_flops_list.append(single_batch_flops)

    print(f'Single-Batch Inference for {mode.capitalize()} {var}: FLOPs: {single_batch_flops}, Latency: {single_batch_latency:.6f} seconds')

# ---- Plotting Results ----
# 1. FLOPs vs Accuracy
plt.figure()
plt.plot(flops_list, val_acc_list, marker='o')
plt.title('FLOPs vs Accuracy')
plt.xlabel('FLOPs')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(result_dirs[0], 'flops_vs_accuracy.png'))

# 2. FLOPs vs Latency (Single-Batch Inference)
plt.figure()
plt.plot(single_batch_flops_list, single_batch_latency_list, marker='o')
plt.title('FLOPs vs Single-Batch Inference Latency')
plt.xlabel('FLOPs')
plt.ylabel('Single-Batch Inference Latency (s)')
plt.savefig(os.path.join(result_dirs[1], 'flops_vs_latency.png'))

# 3. Latency vs Accuracy (Single-Batch Inference)
plt.figure()
plt.plot(single_batch_latency_list, val_acc_list, marker='o')
plt.title('Single-Batch Inference Latency vs Accuracy')
plt.xlabel('Single-Batch Inference Latency (s)')
plt.ylabel('Accuracy')
plt.savefig(os.path.join(result_dirs[2], 'latency_vs_accuracy.png'))

print(f'Plots saved in {result_dirs}')


## # of parameters

In [None]:
import torch

# Function to count trainable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Example usage for GarmentClassifier model
model = GarmentClassifier()
total_params = count_parameters(model)
print(f'Total trainable parameters: {total_params}')


# 1. 파라미터 수 계산 함수 (Trainable Parameters Count)
# 모델의 파라미터 수는 각 계층의 파라미터 개수를 계산하여 더하는 방식으로 구할 수 있습니다. 각 Linear 계층에서의 파라미터 개수는 입력 뉴런 수 × 출력 뉴런 수 + 출력 뉴런 수(바이어스)로 계산됩니다.

# 설명:
# model.parameters()를 통해 모델의 모든 파라미터에 접근합니다.
# 각 파라미터의 numel() 함수를 사용해 그 파라미터 텐서의 요소 개수를 얻고, 이를 모두 더합니다.
# requires_grad=True인 파라미터만을 대상으로 하여 학습 가능한 파라미터의 수를 계산합니다.
# 수식으로 파라미터 계산 (Closed Form):
# fc1: 입력 뉴런 784 (28x28) × 은닉층 뉴런 1024 + 바이어스(1024) = 784 * 1024 + 1024 = 803840
# fc2: 은닉층 뉴런 1024 × 은닉층 뉴런 1024 + 바이어스(1024) = 1024 * 1024 + 1024 = 1049600
# fc3: 은닉층 뉴런 1024 × 출력층 뉴런 10 + 바이어스(10) = 1024 * 10 + 10 = 10250
# 총 파라미터 수: 803840 + 1049600 + 10250 = 1863690

## FLOPs 

In [None]:
# Function to compute FLOPs for GarmentClassifier
def compute_flops(model, input_size=(1, 28*28)):
    flops = 0
    
    # fc1 FLOPs: (input_size * hidden_size) + hidden_size
    flops += (input_size[1] * model.fc1.out_features) + model.fc1.out_features
    
    # fc2 FLOPs: (hidden_size * hidden_size) + hidden_size
    flops += (model.fc1.out_features * model.fc2.out_features) + model.fc2.out_features
    
    # fc3 FLOPs: (hidden_size * output_size) + output_size
    flops += (model.fc2.out_features * model.fc3.out_features) + model.fc3.out_features
    
    return flops

# Example usage for GarmentClassifier model
flops = compute_flops(model)
print(f'Total FLOPs for one forward pass: {flops}')

# 2. FLOPs 계산 함수 (Floating Point Operations)

# FLOPs는 신경망에서의 부동소수점 연산 횟수를 나타내며, 각 계층에서 발생하는 곱셈과 덧셈 연산의 개수를 고려합니다. 여기서는 모델의 각 Linear 계층에 대한 연산을 계산합니다. Linear 계층에서는 다음 연산이 수행됩니다:

# 곱셈 연산: 입력 뉴런 수 × 출력 뉴런 수
# 덧셈 연산: 출력 뉴런 수 (바이어스 덧셈)

# 설명:
# 각 Linear 계층에서의 곱셈 연산과 덧셈 연산을 더해 FLOPs를 계산합니다.
# 예를 들어, fc1 계층에서는 28*28 크기의 입력이 1024개의 은닉층 뉴런으로 전달되므로, 곱셈 연산은 784 * 1024개, 덧셈 연산은 1024개가 발생합니다.
# 이 계산을 모든 계층에 대해 수행하여 총 FLOPs를 구합니다.
# FLOPs 계산 방식:
# fc1: (784 × 1024) + 1024 = 803840 FLOPs
# fc2: (1024 × 1024) + 1024 = 1049600 FLOPs
# fc3: (1024 × 10) + 10 = 10250 FLOPs
