## CNN on tinyImageNet

In [1]:
# Import necessary libraries
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, Dataset
from torchvision import datasets
from torchvision.models import vgg19, resnet50
from torch import nn
import torch.optim as optim
import random
import timm  
import os
from shutil import copy2

# Load Tiny ImageNet dataset directory path
data_dir = 'tiny-imagenet-200'

# Organize validation data based on annotations
def organize_val_data(data_dir):
    val_annotations_path = os.path.join(data_dir, 'val', 'val_annotations.txt')
    val_images_path = os.path.join(data_dir, 'val', 'images')
    val_target_dir = os.path.join(data_dir, 'val', 'organized')

    os.makedirs(val_target_dir, exist_ok=True)

    with open(val_annotations_path, 'r') as file:
        for line in file:
            split_line = line.split('\t')
            img_file = split_line[0]
            label = split_line[1]

            label_dir = os.path.join(val_target_dir, label)
            os.makedirs(label_dir, exist_ok=True)
            img_path = os.path.join(val_images_path, img_file)
            copy2(img_path, os.path.join(label_dir, img_file))

    return val_target_dir

val_target_dir = organize_val_data(data_dir)

# Data preprocessing transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Reduce image size for faster processing
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization
])

# Load train and validation datasets
train_dataset = datasets.ImageFolder(root=f'{data_dir}/train', transform=transform)
val_dataset = datasets.ImageFolder(root=val_target_dir, transform=transform)

# Optional: Use a subset of data for faster experimentation
subset_indices = random.sample(range(len(train_dataset)), 2000)  # Adjust as needed for faster processing
train_dataset = Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

# Function to evaluate model accuracy
def evaluate_model(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# Set device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Function to fine-tune and evaluate a model
def fine_tune_model(model, train_loader, val_loader, device, num_epochs=1):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, momentum=0.9)

    # Training loop
    model.train()
    for epoch in range(num_epochs):
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    accuracy = evaluate_model(model, val_loader, device)
    return accuracy

# Evaluate VGG-19
print("Evaluating VGG-19...")
vgg19_model = vgg19(pretrained=True)
vgg19_model.classifier[6] = nn.Linear(4096, 200)  # Adjust output layer for 200 classes
vgg19_accuracy = fine_tune_model(vgg19_model, train_loader, val_loader, device)
print(f"VGG-19 Accuracy: {vgg19_accuracy:.2f}%")

# Evaluate ResNet-50
print("Evaluating ResNet-50...")
resnet50_model = resnet50(pretrained=True)
resnet50_model.fc = nn.Linear(resnet50_model.fc.in_features, 200)  # Adjust output layer
resnet50_accuracy = fine_tune_model(resnet50_model, train_loader, val_loader, device)
print(f"ResNet-50 Accuracy: {resnet50_accuracy:.2f}%")

# Load and fine-tune InceptionV4 using `timm`
print("Evaluating InceptionV4...")
inceptionv4_model = timm.create_model('inception_v4', pretrained=True)  # Load pre-trained InceptionV4
inceptionv4_model.reset_classifier(num_classes=200)  # Adjust output layer for 200 classes

# Only enable gradients for the classifier layer
for param in inceptionv4_model.parameters():
    param.requires_grad = False
for param in inceptionv4_model.get_classifier().parameters():
    param.requires_grad = True

# Fine-tune the model
inceptionv4_accuracy = fine_tune_model(inceptionv4_model, train_loader, val_loader, device)
print(f"InceptionV4 Accuracy: {inceptionv4_accuracy:.2f}%")

# Report results
print("\nComparison Report:")
print(f"VGG-19 Accuracy: {vgg19_accuracy:.2f}%")
print(f"ResNet-50 Accuracy: {resnet50_accuracy:.2f}%")
print(f"InceptionV4 Accuracy: {inceptionv4_accuracy:.2f}%")


Evaluating VGG-19...




VGG-19 Accuracy: 17.52%
Evaluating ResNet-50...




ResNet-50 Accuracy: 15.11%
Evaluating InceptionV4...
InceptionV4 Accuracy: 2.55%

Comparison Report:
VGG-19 Accuracy: 17.52%
ResNet-50 Accuracy: 15.11%
InceptionV4 Accuracy: 2.55%


## RNN on synthetic time series

In [39]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Generating synthetic time series data
def generate_synthetic_time_series(size=1000):
    x = np.linspace(0, 4 * np.pi, size)
    y = np.sin(x) + 0.1 * np.random.randn(size)
    return y

# Create dataset
def create_dataset(series, seq_length):
    X, y = [], []
    for i in range(len(series) - seq_length):
        X.append(series[i:i+seq_length])
        y.append(series[i+seq_length])
    return np.array(X), np.array(y)

# Prepare data for training
def prepare_data(series, seq_length):
    X, y = create_dataset(series, seq_length)
    X = X.reshape(-1, seq_length, 1)
    y = y.reshape(-1, 1)
    return X, y

# Normalize and prepare data
def prepare_time_series_data(size, seq_length):
    series = generate_synthetic_time_series(size=size)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    series = scaler.fit_transform(series.reshape(-1, 1)).flatten()
    X, y = prepare_data(series, seq_length)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = map(torch.tensor, (X_train, X_val, y_train, y_val))
    return X_train.float(), X_val.float(), y_train.float(), y_val.float()

# Define model architectures
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, model_type='lstm'):
        super(RNNModel, self).__init__()
        if model_type == 'lstm':
            self.rnn = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        elif model_type == 'gru':
            self.rnn = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        elif model_type == 'rnn':
            self.rnn = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True)
        elif model_type == 'bidir':
            self.rnn = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * (2 if model_type == 'bidir' else 1), output_dim)

    def forward(self, x):
        h, _ = self.rnn(x)
        return self.fc(h[:, -1, :])

# Training function
def train_model(model, X_train, y_train, num_epochs=10, lr=0.001):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

# Evaluation function on testing data
def evaluate_model(model, X_val, y_val):
    criterion = nn.MSELoss()
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
    print(f"Validation Loss: {val_loss.item():.4f}")

# Models for training
models = {
    "LSTM": RNNModel(input_dim=1, hidden_dim=50, output_dim=1, model_type='lstm'),
    "GRU": RNNModel(input_dim=1, hidden_dim=50, output_dim=1, model_type='gru'),
    "Bidirectional RNN": RNNModel(input_dim=1, hidden_dim=50, output_dim=1, model_type='bidir'),
    "Deep RNN": RNNModel(input_dim=1, hidden_dim=50, output_dim=1, num_layers=2, model_type='rnn')
}

# Experiment parameters
seq_length = 20
sizes = [1000, 3000, 3000 * 3]  # Original, three times, and nine times sizes

# Training loop for each size
for size in sizes:
    print(f"\nRunning experiments for dataset size: {size}")
    X_train, X_val, y_train, y_val = prepare_time_series_data(size, seq_length)
    
    # Train and evaluate each model
    for name, model in models.items():
        print(f"\nTraining {name} on dataset of size {size}...")
        train_model(model, X_train, y_train)
        evaluate_model(model, X_val, y_val)



Running experiments for dataset size: 1000

Training LSTM on dataset of size 1000...
Epoch 1/10, Loss: 0.3432
Epoch 2/10, Loss: 0.3367
Epoch 3/10, Loss: 0.3304
Epoch 4/10, Loss: 0.3244
Epoch 5/10, Loss: 0.3185
Epoch 6/10, Loss: 0.3129
Epoch 7/10, Loss: 0.3073
Epoch 8/10, Loss: 0.3019
Epoch 9/10, Loss: 0.2964
Epoch 10/10, Loss: 0.2910
Validation Loss: 0.3131

Training GRU on dataset of size 1000...
Epoch 1/10, Loss: 0.3170
Epoch 2/10, Loss: 0.3040
Epoch 3/10, Loss: 0.2913
Epoch 4/10, Loss: 0.2789
Epoch 5/10, Loss: 0.2667
Epoch 6/10, Loss: 0.2548
Epoch 7/10, Loss: 0.2432
Epoch 8/10, Loss: 0.2318
Epoch 9/10, Loss: 0.2206
Epoch 10/10, Loss: 0.2096
Validation Loss: 0.2196

Training Bidirectional RNN on dataset of size 1000...
Epoch 1/10, Loss: 0.3312
Epoch 2/10, Loss: 0.3256
Epoch 3/10, Loss: 0.3200
Epoch 4/10, Loss: 0.3146
Epoch 5/10, Loss: 0.3092
Epoch 6/10, Loss: 0.3039
Epoch 7/10, Loss: 0.2986
Epoch 8/10, Loss: 0.2933
Epoch 9/10, Loss: 0.2880
Epoch 10/10, Loss: 0.2827
Validation Loss: 