In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
# import zipfile
# with zipfile.ZipFile('/notebooks/tiny-imagenet.zip', 'r') as zip_ref:
#     zip_ref.extractall('/notebooks/')

In [40]:
class Params:
    def __init__(self):
        self.batch_size = 200
        self.name = "resnet18_color_perception"
        self.lr = 0.1
        self.workers = 4
        self.moment = 0.9 ## Deleted bethas added moment
        self.weight_decay = 1e-4
        self.lr_step_size = 30
        self.lr_gamma = 0.1
        self.total_epochs = 500

    def __repr__(self):
        return str(self.__dict__)
    
    def __eq__(self, other):
        return self.__dict__ == other.__dict__

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")

params = Params()
params, params.batch_size

Using mps device


({'batch_size': 200, 'name': 'resnet18_color_perception', 'lr': 0.1, 'workers': 4, 'moment': 0.9, 'weight_decay': 0.0001, 'lr_step_size': 30, 'lr_gamma': 0.1, 'total_epochs': 500},
 200)

In [41]:
training_folder = 'tiny-imagenet-200/train'
class_to_idx = {}
classes = []
n_images = 0
for folder in os.listdir(training_folder):
    if '.' not in folder:
        classes.append(folder)
        class_folder = "{}/{}".format(training_folder, folder)
        for sub_folder in os.listdir(class_folder):
            if '.' not in sub_folder:
                images_folder = "{}/{}".format(class_folder, sub_folder)
                break
        for image in os.listdir(images_folder):
            if '.' in image:
                n_images += 1
classes.sort()
for i, class_name in enumerate(classes):
    class_to_idx[class_name] = i

print(n_images)
print(class_to_idx)

100000
{'n01443537': 0, 'n01629819': 1, 'n01641577': 2, 'n01644900': 3, 'n01698640': 4, 'n01742172': 5, 'n01768244': 6, 'n01770393': 7, 'n01774384': 8, 'n01774750': 9, 'n01784675': 10, 'n01855672': 11, 'n01882714': 12, 'n01910747': 13, 'n01917289': 14, 'n01944390': 15, 'n01945685': 16, 'n01950731': 17, 'n01983481': 18, 'n01984695': 19, 'n02002724': 20, 'n02056570': 21, 'n02058221': 22, 'n02074367': 23, 'n02085620': 24, 'n02094433': 25, 'n02099601': 26, 'n02099712': 27, 'n02106662': 28, 'n02113799': 29, 'n02123045': 30, 'n02123394': 31, 'n02124075': 32, 'n02125311': 33, 'n02129165': 34, 'n02132136': 35, 'n02165456': 36, 'n02190166': 37, 'n02206856': 38, 'n02226429': 39, 'n02231487': 40, 'n02233338': 41, 'n02236044': 42, 'n02268443': 43, 'n02279972': 44, 'n02281406': 45, 'n02321529': 46, 'n02364673': 47, 'n02395406': 48, 'n02403003': 49, 'n02410509': 50, 'n02415577': 51, 'n02423022': 52, 'n02437312': 53, 'n02480495': 54, 'n02481823': 55, 'n02486410': 56, 'n02504458': 57, 'n02509815': 58,

In [42]:
class Class2Idx:
    def __init__(self, new_class_to_idx, original_class_to_idx=None):
        self.class_to_idx = new_class_to_idx
        self.original_class_to_idx = original_class_to_idx
        if self.original_class_to_idx:
            self.original_idx_to_class = {v: k for k, v in self.original_class_to_idx.items()}

    def __call__(self, target):
        if not self.original_class_to_idx:
            return self.class_to_idx[target]
        return self.class_to_idx[self.original_idx_to_class[target]]

In [43]:
from torchvision.datasets import ImageFolder

class CustomImageFolder(ImageFolder):
    def __init__(self, root, transform=None, custom_class_to_idx=None):
        super().__init__(root, transform=transform)

        # Override the class_to_idx and classes attributes
        if custom_class_to_idx:
            self.target_transform = Class2Idx(custom_class_to_idx, original_class_to_idx=self.class_to_idx)
            self.class_to_idx = custom_class_to_idx

In [44]:
# val_data = pd.read_csv('tiny-imagenet-200/val/val_annotations.txt', sep='\t', header= None)
# val_data[[0, 1]]
# counter = 0
# for path, name in zip(val_data[0], val_data[1]):
#     image_path = f'tiny-imagenet-200/val/images/{path}'
#     name = f'tiny-imagenet-200/val/images/{name}.jpg'
#     os.rename(image_path, name)
#     counter += 1
#     if (counter/100) == 0:
#         print(counter)

In [45]:
# source_directory = "/notebooks/tiny-imagenet-200/val/images"

# try:
#     # Get a list of all files in the directory
#     files = [f for f in os.listdir(source_directory) if os.path.isfile(os.path.join(source_directory, f))]

#     # Iterate through the files
#     for file_name in files:
#         # Create a folder name based on the file name (excluding the extension)
#         folder_name = os.path.splitext(file_name)[0]
#         folder_path = os.path.join(source_directory, folder_name)
        
#         # Create the folder
#         os.makedirs(folder_path, exist_ok=True)
        
#         # Move the file into the folder
#         source_file_path = os.path.join(source_directory, file_name)
#         destination_file_path = os.path.join(folder_path, file_name)
#         shutil.move(source_file_path, destination_file_path)
        
#         print(f"Moved {file_name} to {folder_path}")
# except Exception as e:
#     print(f"An error occurred: {e}")

In [46]:
def show_image(image, label):
    image = image.permute(1, 2, 0)
    plt.imshow(image.squeeze())
    plt.title(f'Label: {label}')
    plt.show()

In [47]:
import os
import shutil

## to get paths and names of each image

def images_get_paths(path):
    paths = {}
    names = {}
    rel_paths = ''
    for file in os.listdir(path):
        if '.' not in file:
            for img in os.listdir(path + '/' + file):
                rel_paths = path + '/' + file + '/' + os.path.relpath(img)
                if file not in paths.keys():
                    paths[file] = [rel_paths]
                    names[file] = [os.path.relpath(img)]
                else:
                    paths[file].append(rel_paths)
                    names[file].append(os.path.relpath(img))
    return paths, names

## to organizes files from the given root path into a new structured directory.

def organize_files(path):
    new_root = os.path.join("Dataset", os.path.basename(path) + "_organized")
    os.makedirs(new_root, mode=0o777, exist_ok=True)
    files_paths, files_names = images_get_paths(path)

    for month in files_paths.keys():
        month_folder = os.path.join(new_root, month)
        os.makedirs(month_folder, mode=0o777, exist_ok=True)
        
        for name, file_path in zip(files_names[month], files_paths[month]):
            category = name.split("_")[0]
            category_folder = os.path.join(month_folder, category)
            os.makedirs(category_folder, mode=0o777, exist_ok=True)
            shutil.move(file_path, os.path.join(category_folder, name))
            print(f"Moved: {file_path} -> {category_folder}")
# organize_files('tiny-imagenet-200/val_reorganized')

In [48]:
## loader for different datasets

## I deleted the convertion from BGR to RGB cause I redid dataset
def Loader_train(root_folder):
    train_transformation = transforms.Compose([
            # transforms.Resize((64,64)),
            transforms.RandomVerticalFlip(0.5),
            transforms.RandomHorizontalFlip(0.5),
            transforms.RandomRotation(degrees=(180)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.485, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    train_dataset = torchvision.datasets.ImageFolder(
        root = root_folder,
        transform = train_transformation
    )
    train_sampler = torch.utils.data.RandomSampler(train_dataset)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=params.batch_size,
        sampler=train_sampler,
        num_workers = params.workers,
        pin_memory=True,
    )
    return train_loader

In [49]:
val_transformation = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.ConvertImageDtype(torch.float32),
        # Normalize the pixel values (in R, G, and B channels)
        transforms.Normalize(mean=[0.485, 0.485, 0.406], std=[0.229, 0.224, 0.225])
    ])
# val_dataset = ValidationDataset('/tiny-imagenet-200/val/val_annotations.txt','/tiny-imagenet-200/val/images', transform=val_transformation, target_transform=Class2Idx(class_to_idx))
val_dataset = CustomImageFolder(
        root = "val_organized",
        transform = val_transformation,
        custom_class_to_idx=class_to_idx,
    )

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=16,
    num_workers=params.workers,
    shuffle=False,
    pin_memory=True
)

In [50]:
from math import sqrt
def train(dataloader, model, loss_fn, optimizer, epoch, writer, dataset_id):
    size = len(dataloader.dataset)
    model.train()
    start0 = time.time()
    start = time.time()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        batch_size = len(X)
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * batch_size
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}], {(current/size * 100):>4f}%")
            step=epoch+current/size
            writer.add_scalar('training loss/dataset_{}'.format(dataset_id), 
                            loss,
                            step)
            new_start = time.time()
            delta = new_start - start
            start = new_start
            if batch != 0:
                print("Done in ", delta, " seconds")
                remaining_steps = size - current
                speed = 100 * batch_size / delta
                remaining_time = remaining_steps / speed
                print("Remaining time (seconds): ", remaining_time)
        optimizer.zero_grad()
    print("Entire epoch done in ", time.time() - start0, " seconds")

In [51]:
def test(dataloader, model, loss_fn, epoch, writer, train_dataloader, dataset_id, calc_acc5=False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct, correct_top5 = 0, 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            if calc_acc5:
                _, pred_top5 = pred.topk(5, 1, largest=True, sorted=True)
                correct_top5 += pred_top5.eq(y.view(-1, 1).expand_as(pred_top5)).sum().item()
    test_loss /= num_batches
    step = epoch
    if writer != None:
        writer.add_scalar('test loss/dataset_{}'.format(dataset_id),
                            test_loss,
                            step)
    correct /= size
    correct_top5 /= size
    if writer != None:
        writer.add_scalar('test accuracy/dataset_{}'.format(dataset_id),
                            100*correct,
                            step)
        if calc_acc5:
            writer.add_scalar('test accuracy5/dataset_{}'.format(dataset_id),
                            100*correct_top5,
                            step)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    if calc_acc5:
        print(f"Test Error: \n Accuracy-5: {(100*correct_top5):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct

In [52]:
model = torchvision.models.resnet18()
model.fc=nn.Linear(model.fc.in_features, 200) ## Added this just to make sure that the output fits the labels
loss_fn = nn.CrossEntropyLoss()

## Changed the optimizer her for SGD with momentum
optimizer = torch.optim.SGD(model.parameters(), lr = params.lr, momentum=params.moment, weight_decay = params.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=params.lr_step_size, gamma=params.lr_gamma)

In [53]:
model = model.to(device)
resume_training = True

In [56]:
from torch.utils.tensorboard import SummaryWriter
from pathlib import Path

start_dataset_idx = 1
start_epoch = 1
early_stopping_patience = 10
no_improvement_count = 0
best_val_accuracy = float('-inf')

checkpoint_path = os.path.join("checkpoints", params.name, f"checkpoint.pth")

if resume_training and os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
    start_epoch = checkpoint["epoch"] + 1
    start_dataset_idx = checkpoint["dataset_idx"]
    best_val_accuracy = checkpoint.get("best_val_accuracy", float('-inf')) 
    no_improvement_count = checkpoint.get("no_improvement_count", 0)
    assert params == checkpoint["params"]

Path(os.path.join("checkpoints", params.name)).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter('runs/' + params.name)

dataset_root = 'Dataset/Color_organized'
dataset_folders = [os.path.join(dataset_root, f"Color_{i}_months") for i in range(0, 13)]

In [57]:
print(start_epoch, start_dataset_idx)

1 1


In [None]:
for dataset_idx, dataset_folder in enumerate(dataset_folders, start=1):
    if dataset_idx < start_dataset_idx:
        continue

    print(f"Training on dataset {dataset_idx} at {dataset_folder}")
    train_loader = Loader_train(dataset_folder)

    for epoch in range(start_epoch if dataset_idx == start_dataset_idx else 1, params.total_epochs):
        train(train_loader, model, loss_fn, optimizer, epoch, writer, dataset_idx)
        
        val_accuracy = test(val_loader, model, loss_fn, epoch, writer, train_dataloader=train_loader, dataset_id=dataset_idx, calc_acc5=True)
        
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            no_improvement_count = 0
            
            checkpoint = {
            "model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "lr_scheduler": lr_scheduler.state_dict(),
            "epoch": epoch,
            "dataset_idx": dataset_idx,
            "params": params,
            "best_val_accuracy": best_val_accuracy,
            "no_improvement_count": no_improvement_count,
            }
            torch.save(checkpoint, checkpoint_path)
            print(f"Checkpoint successfully saved at {checkpoint_path}")
            print(f"New best validation accuracy: {val_accuracy:.4f}")
            
        else:
            no_improvement_count += 1
            print(f"No improvement for {no_improvement_count} epochs.")

        if no_improvement_count >= early_stopping_patience:
            print(f"Early stopping triggered after {epoch} epochs with no improvement.")
            break
    
    
    start_epoch = 1  
    no_improvement_count = 0
    print(f"Finished training on dataset {dataset_idx}.")
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model"])

writer.close()

Training on dataset 1 at Dataset/Color_organized/Color_0_months
loss: 5.363074  [  200/100000], 0.200000%
loss: 5.141621  [20200/100000], 20.200000%
Done in  243.4505491256714  seconds
Remaining time (seconds):  971.3676910114287
loss: 4.934868  [40200/100000], 40.200000%
Done in  243.27603673934937  seconds
Remaining time (seconds):  727.3953498506546
loss: 4.925238  [60200/100000], 60.200000%
Done in  246.60124802589417  seconds
Remaining time (seconds):  490.7364835715294
loss: 4.825046  [80200/100000], 80.200000%
Done in  242.1637098789215  seconds
Remaining time (seconds):  239.7420727801323
Entire epoch done in  1246.426864862442  seconds


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/sarbupintemirlan/miniforge3/envs/CVP/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/sarbupintemirlan/miniforge3/envs/CVP/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'CustomImageFolder' on <module '__main__' (built-in)>


In [20]:
test_accuracy = test(val_loader, model, loss_fn, 1, writer, train_dataloader=train_loader,dataset_id = dataset_idx, calc_acc5=True)
print(test_accuracy)

Test Error: 
 Accuracy: 0.2%, Avg loss: 13.236645 

Test Error: 
 Accuracy-5: 1.1%, Avg loss: 13.236645 

0.0019


In [None]:
# ## Standard Training
# train_loader = Loader_train(dataset_root)
# for epoch in range(start_epoch, params.total_epochs+1):
#     train(train_loader, model, loss_fn, optimizer, epoch=epoch, writer=writer)
#     checkpoint = {
#         "model": model.state_dict(),
#         "optimizer": optimizer.state_dict(),
#         "lr_scheduler": lr_scheduler.state_dict(),
#         "epoch": epoch,
#         "params": params
#      }
#     torch.save(checkpoint, checkpoint_path)
#     lr_scheduler.step()
#     test(val_loader, model, loss_fn, epoch + 1, writer, train_dataloader=train_loader, calc_acc5=True)

loss: 4.909402  [   64/100000], 0.064000%
loss: 5.078261  [ 6464/100000], 6.464000%
Done in  9.46090054512024  seconds
Remaining time (seconds):  138.2710614669323
loss: 4.922323  [12864/100000], 12.864000%
Done in  6.781798362731934  seconds
Remaining time (seconds):  92.33418470859527
loss: 4.507597  [19264/100000], 19.264000%
Done in  7.1010520458221436  seconds
Remaining time (seconds):  89.57977155804633
loss: 4.929873  [25664/100000], 25.664000%
Done in  7.03376317024231  seconds
Remaining time (seconds):  81.69715922236443
loss: 4.833933  [32064/100000], 32.064000%
Done in  6.653949499130249  seconds
Remaining time (seconds):  70.63167393326759
loss: 4.751972  [38464/100000], 38.464000%
Done in  6.292416334152222  seconds
Remaining time (seconds):  60.50158305287361
loss: 4.657359  [44864/100000], 44.864000%
Done in  7.519590616226196  seconds
Remaining time (seconds):  64.78127315878868
