In [1]:
import torch.nn as nn

# 2-1

## load, augment and struct the training dataset

In [2]:
import torch
from PIL import Image
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available else "cpu"
torch.set_default_device(device)
torch.manual_seed(42)

device

'cuda'

In [3]:
import os

classes = train_directories = os.listdir("./TRAIN/")
discretized_classes = [i for i in range(classes.__len__())]
classes, discretized_classes

X = torch.zeros((8000 + 8000, 3, 128, 128), dtype=torch.uint8)
X_finetune = torch.zeros((1600 + 1600, 3, 128, 128), dtype=torch.uint8)

In [4]:
common_transform = transforms.Compose([transforms.PILToTensor()])
augmented_transform = transforms.Compose(
    [
        transforms.PILToTensor(),
        transforms.RandomAffine(degrees=20, translate=(0.10, 0.10), shear=30),
    ]  # rotation from -20 to 20 , translate %10 along both axes, and rotation along the x-axis
)

# finetune = torch.zeros((1600 + 1600, 3, 128, 128))
some_transformed_random_images_for_showcasing = []

for process in ["TRAIN"]:
    x_iterator = 0
    for directory in classes:
        directory = f"./{process}/{directory}"
        for file in os.listdir(directory):
            image = Image.open(f"{directory}/{file}")
            if process == "TRAIN":
                X[x_iterator] = common_transform(image)
                X[x_iterator + 1] = augmented_transform(image)
            else:
                X_finetune[x_iterator] = common_transform(image)
                X_finetune[x_iterator + 1] = augmented_transform(image)
            x_iterator += 2

In [5]:
from sklearn.model_selection import train_test_split

y = torch.zeros(16000, dtype=torch.uint8)
y_finetune = torch.zeros(16000, dtype=torch.uint8)

for i in range(classes.__len__()):
    y[i * 2000 : i * 2000 + 2000] = i
    y_finetune[i * 400 : i * 400 + 400] = i
x_train, x_val, y_train, y_val = train_test_split(X, y)


x_train.shape, x_val.shape, y_train.shape, y_val.shape

(torch.Size([12000, 3, 128, 128]),
 torch.Size([4000, 3, 128, 128]),
 torch.Size([12000]),
 torch.Size([4000]))

In [6]:
X_train[0]

NameError: name 'X_train' is not defined

# Train dataset

In [32]:
device = "cuda" if torch.cuda.is_available else "cpu"
torch.set_default_device(device)
torch.manual_seed(42)

device

'cuda'

In [42]:
import random
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, ConcatDataset

generator = torch.Generator(device=device)
# Define augmentation transforms
rotation_degrees = 20
translation_percent = 0.1
validation_ratio = 0.2

transform_list = [
    transforms.RandomRotation(degrees=(-rotation_degrees, rotation_degrees)),
    transforms.RandomAffine(degrees=0, translate=(translation_percent, translation_percent)),
    transforms.RandomVerticalFlip(p=0.5),
    
]

# Path and configurations
data_dir = "./TRAIN/"
batch_size = 400

# Original dataset without augmentation
original_dataset = ImageFolder(root=data_dir, transform=transforms.ToTensor())

augmented = []
for index, (data, label) in enumerate(original_dataset):
    images = []
    for transform in transform_list:
        if random.random() > 0.15:
            transformed_data = transform(data)
            images.append(transformed_data)
            if len(images) >= 2 and random.random() > 0.50:
                transformed_data = transform(images[-2])
                images.append(transformed_data)
    else:
        for image in images:  augmented.append((image, label))
        
        
        

class AugmentedCustomDataset(Dataset):
    def __init__(self, augmented_data):
        self.augmented_data = augmented_data

    def __len__(self):
        return len(self.augmented_data)

    def __getitem__(self, idx):
        images, label = self.augmented_data[idx]
        tensor_images = [torch.Tensor(image) for image in images]
        combined_images = torch.stack(tensor_images)
        
        return combined_images, label

custom_augmented_dataset = AugmentedCustomDataset(augmented)
combined_dataset = torch.utils.data.ConcatDataset([original_dataset, custom_augmented_dataset])

val_size = int(len(combined_dataset) * validation_ratio)
train_size = int(len(combined_dataset) - val_size)



In [43]:
device

'cuda'

In [44]:
generator = torch.Generator(device=device)
train_dataset, val_dataset = torch.utils.data.random_split(combined_dataset, [train_size, val_size], generator=generator)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=400, shuffle=True,generator=torch.Generator(device='cuda'))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=400, shuffle=False,generator=torch.Generator(device='cuda'))

# Fine tune dataset

In [67]:


data_dir = './TUNE/'
data_transforms = transforms.Compose([
    transforms.ToTensor()
])
custom_dataset = ImageFolder(root=data_dir, transform=data_transforms)

batch_size = 400 
val_size = int(len(custom_dataset) * 0.1)
train_size = int(len(custom_dataset) - val_size)


generator = torch.Generator(device=device)
train_dataset_ft, val_dataset_ft = torch.utils.data.random_split(custom_dataset, [train_size, val_size],generator=generator)
train_loader_ft = torch.utils.data.DataLoader(train_dataset_ft, batch_size=1, shuffle=True,generator=torch.Generator(device='cuda'))
val_loader_ft = torch.utils.data.DataLoader(val_dataset_ft, batch_size=1, shuffle=False,generator=torch.Generator(device='cuda'))

# 3-1

In [55]:
import torch.nn as nn

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=9, padding=4)
        self.bn1 = nn.BatchNorm2d(16, affine=False)
        self.conv_2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=7, padding=3)
        self.bn2 = nn.BatchNorm2d(32, affine=False)
        self.conv_3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.bn3 = nn.BatchNorm2d(64, affine=False)
        self.conv_4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(128, affine=False)
        self.conv_5 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(128, affine=False)
        self.dropout_dense = nn.Dropout(p=0.5)
        self.max_pool = nn.MaxPool2d(2)
        self.dropout_pool = nn.Dropout2d(p=0.2)
        
        self.flatten = nn.Flatten(1)
        self.dense_1 = nn.Linear(2048, 1024)
        self.dense_2 = nn.Linear(1024, 1024)
        
        self.output = nn.Linear(1024, 8)
        
        self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv_1(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.max_pool(x)
       
#         x = self.dropout_pool(x)

        x = self.conv_2(x)
        x = self.relu(x)
        x = self.bn2(x)
        x = self.max_pool(x)
#         x = self.dropout_pool(x)
        
        x = self.conv_3(x)
        x = self.relu(x)
        x = self.bn3(x)
        x = self.max_pool(x)
#         x = self.dropout_pool(x)

        x = self.conv_4(x)
        x = self.relu(x)
        x = self.bn4(x)
        x = self.max_pool(x)
#         x = self.dropout_pool(x)

        x = self.conv_5(x)
        x = self.bn5(x)
        x = self.relu(x)
        x = self.max_pool(x)
#         x = self.dropout_pool(x)

        x = self.flatten(x)
        x = self.dense_1(x)
#         x = self.dropout_pool(x)
        
        x = self.dense_2(x)
#         x = self.dropout_pool(x)
        
        x = self.output(x)
        return x


# ---------------------------------------------------------

In [56]:
import torch.optim as optim

alex_net = AlexNet().to(device).float()
optimizer = optim.Adam(alex_net.parameters(), lr=0.001) 
criterion = nn.CrossEntropyLoss()

# Training

In [61]:
num_epochs = 24

for epoch in range(num_epochs):
    alex_net.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = alex_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_accuracy = 100 * correct_train / total_train
    alex_net.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = alex_net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_accuracy = 100 * correct_val / total_val

    epoch_loss = running_loss / len(train_loader)
    epoch_val_loss = val_loss / len(val_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}] - Loss: {epoch_loss:.4f} - Val Loss: {epoch_val_loss:.4f} - Train Acc: {train_accuracy:.2f}% - Val Acc: {val_accuracy:.2f}%")

KeyboardInterrupt: 

# Fine-tuning

In [68]:
for epoch in range(16):
    alex_net.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    

    for inputs, labels in train_loader_ft:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = alex_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_accuracy = 100 * correct_train / total_train
    alex_net.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader_ft:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = alex_net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_accuracy = 100 * correct_val / total_val

    epoch_loss = running_loss / len(train_loader_ft)
    epoch_val_loss = val_loss / len(val_loader_ft)
    print(f"Epoch [{epoch + 1}/{16}] - Loss: {epoch_loss:.4f} - Val Loss: {epoch_val_loss:.4f} - Train Acc: {train_accuracy:.2f}% - Val Acc: {val_accuracy:.2f}%")

Epoch [1/16] - Loss: 3.4538 - Val Loss: 3.4250 - Train Acc: 33.26% - Val Acc: 26.88%
Epoch [2/16] - Loss: 3.2084 - Val Loss: 2.7107 - Train Acc: 35.07% - Val Acc: 20.00%
Epoch [3/16] - Loss: 1.8088 - Val Loss: 7.7623 - Train Acc: 37.85% - Val Acc: 28.12%
Epoch [4/16] - Loss: 3.0428 - Val Loss: 8.0784 - Train Acc: 38.68% - Val Acc: 29.38%
Epoch [5/16] - Loss: 1.3501 - Val Loss: 4.5862 - Train Acc: 47.08% - Val Acc: 21.25%
Epoch [6/16] - Loss: 2.1784 - Val Loss: 4.2656 - Train Acc: 41.88% - Val Acc: 30.62%
Epoch [7/16] - Loss: 1.9735 - Val Loss: 3.6783 - Train Acc: 46.67% - Val Acc: 27.50%
Epoch [8/16] - Loss: 1.4923 - Val Loss: 3.2028 - Train Acc: 52.29% - Val Acc: 32.50%
Epoch [9/16] - Loss: 2.9876 - Val Loss: 3.5495 - Train Acc: 50.49% - Val Acc: 35.62%
Epoch [10/16] - Loss: 0.8578 - Val Loss: 5.2195 - Train Acc: 66.88% - Val Acc: 26.25%
Epoch [11/16] - Loss: 2.7040 - Val Loss: 2.9736 - Train Acc: 51.60% - Val Acc: 32.50%
Epoch [12/16] - Loss: 6.9231 - Val Loss: 4.0150 - Train Acc: 59