# AN2DL Challenge 2 - Image Classification

In [37]:
isColab = False
isKaggle = False

# Set seed for reproducibility
SEED = 42

## Loading Enviroment

In [38]:
import os

# Directory di default
current_dir = os.getcwd()
try:
    if not isColab:
        raise ImportError("We are not in google colab")
    from google.colab import drive

    drive.mount("/gdrive")
    current_dir = "/gdrive/My\\ Drive/[2025-2026]\\ AN2DL/Challenge\\ 2/dataset"
    print("In esecuzione su Colab. Google Drive montato.")
    %cd $current_dir
    isColab = True

except ImportError:
    # Rilevamento ambiente Kaggle
    if os.environ.get("KAGGLE_KERNEL_RUN_TYPE") or os.path.exists("/kaggle/working") or isKaggle:
        isKaggle = True
        kaggle_work_dir = "/kaggle/working/AN2DL-challenge-2"
        os.makedirs(kaggle_work_dir, exist_ok=True)
        current_dir = kaggle_work_dir
        print("In esecuzione su Kaggle. Directory di lavoro impostata.")
    else:
        isColab = False
        isKaggle = False
        print("Esecuzione locale. Salto mount Google Drive.")
        local_pref = r"G:\Il mio Drive\Colab Notebooks\[2025-2026] AN2DL\AN2DL-challenge-2"
        current_dir = local_pref if os.path.isdir(local_pref) else os.getcwd()
        print(f"Directory corrente impostata a: {current_dir}")

# Cambio directory se non Colab (su Colab è già fatto con %cd)
if not isColab:
    os.chdir(current_dir)

print(f"Changed directory to: {current_dir}")

Esecuzione locale. Salto mount Google Drive.
Directory corrente impostata a: G:\Il mio Drive\Colab Notebooks\[2025-2026] AN2DL\AN2DL-challenge-2
Changed directory to: G:\Il mio Drive\Colab Notebooks\[2025-2026] AN2DL\AN2DL-challenge-2


## Import Libraries

In [39]:
# Import necessary libraries
import os

# Set environment variables before importing modules
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np

# Set seeds for random number generators in NumPy and Python
np.random.seed(SEED)
random.seed(SEED)

# Import PyTorch
import torch
torch.manual_seed(SEED)
from torch import nn
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
import torchvision
from torchvision.transforms import v2 as transforms
from torch.utils.data import TensorDataset, DataLoader
%pip install torchview
from torchview import draw_graph

# Configurazione di TensorBoard e directory
logs_dir = "tensorboard"
if isColab or isKaggle:
    !pkill -f tensorboard
    !mkdir -p models
    print("Killed existing TensorBoard instances and created models directory.")
else:
    os.makedirs("../models", exist_ok=True)
    
%load_ext tensorboard


if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True
else:
    device = torch.device("cpu")

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

# Import other libraries
import cv2
import copy
import shutil
from itertools import product
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.gridspec as gridspec
import requests
from io import BytesIO

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

Note: you may need to restart the kernel to use updated packages.
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
PyTorch version: 2.9.1+cpu
Device: cpu


## Dataset Downloading

In [40]:
dataset_dir = os.path.join(current_dir, "dataset")
os.makedirs(dataset_dir, exist_ok=True)

train_set_dir = os.path.join(dataset_dir, "train_data")
test_set_dir = os.path.join(dataset_dir, "test_data")

if isColab:
    # Upload dataset to Google Drive manually or assume it's already there
    print("Running on Colab. Dataset is assumed to be already available.")
    print(f"Dataset directory: {dataset_dir}")
elif isKaggle:
    # Nothing to do, dataset is already available in Kaggle environment
    print("Running on Kaggle. Dataset is assumed to be already available.")
    print(f"Dataset directory: {dataset_dir}")
else:
    # Check if dataset is already downloaded, by checking if the dataset directory is empty
    if not os.listdir(os.path.join(current_dir, "dataset")):
        print("Downloading dataset from Kaggle in local environment...")
        os.chdir(os.path.join(current_dir, "dataset"))
        %kaggle competitions download -c an2dl2526c2
        zip_file = "an2dl2526c2.zip"
        shutil.unpack_archive(zip_file, extract_dir=".")
        os.remove(zip_file)
        os.chdir(current_dir)
    else:
        print("Dataset already present in local environment. Skipping download.")

Dataset already present in local environment. Skipping download.


### Loading images in memory

In [41]:
# Function that loads images from a given directory and returns them as an array
def load_images_from_directory(directory, img_size=(64, 64)):
    images = []
    filenames = []
    if os.path.isdir(directory):
        for filename in os.listdir(directory):
            if filename.endswith((".png", ".jpg", ".jpeg")):
                img_path = os.path.join(directory, filename)
                img = Image.open(img_path).convert("RGB")
                img = img.resize(img_size)
                images.append(np.array(img))
                filenames.append(filename)

    return np.array(images), np.array(filenames)

In [42]:
images, filenames = load_images_from_directory(train_set_dir, img_size=(256, 256))

print(f"Loaded {len(images)} images from {train_set_dir}")

KeyboardInterrupt: 