In [None]:
import os
import csv
import yaml
from PIL import Image
import torch
import torch.nn as nn
import torchvision
import numpy as np
import wandb
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

from dataset import Dataset
from tools.test import test
from tools.train import train
from check_cuda import check_cuda
from models.ResnetFFN import ResnetFFN

from dotenv import load_dotenv

load_dotenv()

In [None]:
# Define paths
csv_file_path = "./data/CodingChallenge_v2/car_imgs_4000.csv"
images_dir = "./data/CodingChallenge_v2/imgs"

# Initialize list to hold training data
images = []
scores_hood = []
scores_backdoor_left = []

IMG_H = 224
IMG_W = 224

In [None]:
# Set predetermined mean and std values
mean_images = np.array([122.09624237, 123.38567456, 120.75862292]) / 255.0
std_images = np.array([61.13438223, 62.09970917, 65.60647365]) / 255.0

# Resize images, convert to torch.Tensor and normalize the dataset regarding predetermined mean and std values
transform_norm = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(
            (IMG_H, IMG_W)
        ),  # Resize images to (IMG_H x IMG_W)
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean_images, std_images),
    ]
)

In [None]:
# Read the CSV file
with open(csv_file_path, mode="r") as csv_file:
    csv_reader = csv.reader(csv_file)

    # Skip the header
    next(csv_reader)

    for row in csv_reader:
        filename, score_hood, score_backdoor_left = row
        # print(f"Filename: {filename}, Score Hood: {score_hood}, Score Backdoor Left: {score_backdoor_left}")
        image_path = os.path.join(images_dir, filename)

        # Check if the image file exists
        if os.path.exists(image_path):

            # Append image and perspective scores to the corresponding lists
            with Image.open(image_path) as img:
                images.append(np.array(transform_norm(img), dtype=np.float32))

            scores_hood.append(np.array(float(score_hood), dtype=np.float32))
            scores_backdoor_left.append(
                np.array(float(score_backdoor_left), dtype=np.float32)
            )

# Check if some scores are NaN
for i in range(len(images)):
    if np.isnan(scores_hood[i]) or np.isnan(scores_backdoor_left[i]):
        print(f"Found NaN scores at index {i}")
        break

images = np.array(images)  # Shape: (4000, 3, IMG_H, IMG_W)
scores_hood = np.array(scores_hood).reshape(-1, 1)  # Shape: (4000, 1)
scores_backdoor_left = np.array(scores_backdoor_left).reshape(-1, 1)  # Shape: (4000, 1)

# Test size of variables
print(f"Images Shape: {images.shape}")
print(f"Scores Hood Shape: {scores_hood.shape}")
print(f"Scores Backdoor Left Shape: {scores_backdoor_left.shape}")
print("")
print(
    "Mean of Image Pixels: ",
    np.mean(images, axis=(0, 2, 3)),
    "   ->   [values should be near 0!]",
)
print(
    "Standard Deviation of Image Pixels: ",
    np.std(images, axis=(0, 2, 3)),
    "   ->   [values should be near 1!]",
)
# print(f"Mean of images: {mean_images}")
# print(f"Std of images: {std_images}")

In [None]:
# plot the pixel values
plt.hist(images.ravel(), bins=50, density=True)
plt.xlabel("pixel values")
plt.ylabel("relative frequency")
plt.title("distribution of pixels")

print(
    """\nIMPORTANT: Our dataset includes a lot of white pixels, which is due to the fact
            that the images are synthetically generated and have a lot of white backgrounds!"""
)

In [None]:
# Analyze the perspective scores characteristics

scores_hood = np.array(scores_hood)
scores_backdoor_left = np.array(scores_backdoor_left)

print(
    f"Score hood: min={scores_hood.min()}, max={scores_hood.max()}, \
mean={scores_hood.mean()}, std={scores_hood.std()}, num_zeros={np.count_nonzero(scores_hood==0)}, \
mean_non_zero={np.mean(scores_hood[scores_hood!=0])}"
)

print(
    f"Score backdoor left: min={scores_backdoor_left.min()}, \
max={scores_backdoor_left.max()}, mean={scores_backdoor_left.mean()}, \
std={scores_backdoor_left.std()}, num_zeros={np.count_nonzero(scores_backdoor_left==0)}, \
mean_non_zero={np.mean(scores_backdoor_left[scores_backdoor_left!=0])}"
)

In [None]:
# Convert lists to tensors and prepare input data
X = torch.from_numpy(images)  # Size: ([4000, 506, 674, 3])

# Concatenate the two scores into a single tensor as the model's output data
y = torch.from_numpy(
    np.concatenate((scores_hood, scores_backdoor_left), axis=1)
)  # Size: [(4000, 2)]

print("X size:", X.size(), "X type:", type(X), "X.dtype:", X.dtype)
print("y size:", y.size(), "y type:", type(y), "y.dtype:", y.dtype)

In [None]:
# check cuda availability
check_cuda()

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nDevice: {device}")

In [None]:
# Define train, test and validation ratios
train_ratio = 0.8
test_ratio = 0.1
val_ratio = 0.1

# Split the data / Shuffle it and maintain class balance
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=(1 - train_ratio), random_state=42, shuffle=True
)

# Further split train_data into test and validation sets
X_val, X_test, y_val, y_test = train_test_split(
    X_test,
    y_test,
    test_size=test_ratio / (test_ratio + val_ratio),
    random_state=42,
    shuffle=True,
)

print("X_train size: ", X_train.size())
print("X_test size: ", X_test.size())
print("X_val size: ", X_val.size())

In [None]:
# # Push torch.Tensors to CPU
# X_train, y_train = X_train.to('cpu', dtype=torch.float32), y_train.to('cpu', dtype=torch.float32)
# X_test, y_test = X_test.to('cpu', dtype=torch.float32), y_test.to('cpu', dtype=torch.float32)
# X_val, y_val = X_val.to('cpu', dtype=torch.float32), y_val.to('cpu', dtype=torch.float32)

# Create a custom dataset and push tensors to CPU
train_dataset = Dataset(X_train, y_train, "cpu")
test_dataset = Dataset(X_test, y_test, "cpu")
val_dataset = Dataset(X_val, y_val, "cpu")

del X, y, X_train, y_train, X_test, y_test, X_val, y_val

In [None]:
# Show how much memory (VRAM) is allocated to GPU
gpu_memory_allocated = torch.cuda.memory_allocated(device)

# Convert memory from bytes to GB
gpu_memory_allocated_gb = gpu_memory_allocated / 1024**3

# Print the memory allocated to the GPU
print("GPU Memory Allocated:", gpu_memory_allocated_gb, "GB")

In [None]:
# Set config

with open(os.getenv("CONFIG_DIR"), "r") as config_file:
    config_model = yaml.safe_load(config_file)

CNN-FCN

In [None]:
# Initialize W&B for RNN-Classifier
wandb.init(
    # set the wandb project where this run will be logged
    project=config_model["Project"]["project_name"],
    # track hyperparameters and run metadata
    config=dict(
        device=device,
        project_name=config_model["Project"]["project_name"],
        model_name=config_model["Model"]["model_name"],
        num_outputs=config_model["Model"]["num_outputs"],
        batch_first=config_model["Model"]["batch_first"],
        conv_channel=config_model["Model"]["conv_channel"],
        fc_hidden_dims=config_model["Model"]["fc_hidden_dims"],
        batch_size=config_model["Dataloader"]["batch_size"],
        shuffle=config_model["Dataloader"]["shuffle"],
        num_workers=config_model["Dataloader"]["num_workers"],
        pin_memory=config_model["Dataloader"]["pin_memory"],
        drop_last=config_model["Dataloader"]["drop_last"],
        optimizer=config_model["Optimizer"]["optimizer"],
        backbone_lr=float(config_model["Optimizer"]["backbone_lr"]),
        other_lr=float(config_model["Optimizer"]["other_lr"]),
        beta1=config_model["Optimizer"]["beta1"],
        beta2=config_model["Optimizer"]["beta2"],
        eps=float(config_model["Optimizer"]["eps"]),
        weight_decay=config_model["Optimizer"]["weight_decay"],
        amsgrad=config_model["Optimizer"]["amsgrad"],
        maximize=config_model["Optimizer"]["maximize"],
        foreach=config_model["Optimizer"]["foreach"],
        capturable=config_model["Optimizer"]["capturable"],
        differentiable=config_model["Optimizer"]["differentiable"],
        fused=config_model["Optimizer"]["fused"],
        scheduler=config_model["Scheduler"]["scheduler"],
        factor=config_model["Scheduler"]["factor"],
        patience=config_model["Scheduler"]["patience"],
        threshold=config_model["Scheduler"]["threshold"],
        threshold_mode=config_model["Scheduler"]["threshold_mode"],
        cooldown=config_model["Scheduler"]["cooldown"],
        min_lr=config_model["Scheduler"]["min_lr"],
        verbose=config_model["Scheduler"]["verbose"],
        loss=config_model["Loss"]["loss"],
        num_epochs=config_model["Training"]["num_epochs"],
        save_dir=config_model["Training"]["save_dir"],
        save_period=config_model["Training"]["save_period"],
        log_period=config_model["Training"]["log_period"],
        log_dir=config_model["Training"]["log_dir"],
        log_file=config_model["Training"]["log_file"],
        log_level=config_model["Training"]["log_level"],
        seed=config_model["Training"]["seed"],
    ),
)

# initialize config
config = wandb.config

model = ResnetFFN(
    config.device,
    config.num_outputs,
    config.batch_first,
    config.conv_channel,
    config.fc_hidden_dims,
).to(device)
print(model)

INITIALIZE DATA LOADERS

In [None]:
# Divide train and test dataset into batches
train_loader = DataLoader(
    train_dataset,
    batch_size=config.batch_size,
    shuffle=config.shuffle,
    num_workers=config.num_workers,
    pin_memory=config.pin_memory,
    drop_last=config.drop_last,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=config.batch_size,
    shuffle=config.shuffle,
    num_workers=config.num_workers,
    pin_memory=config.pin_memory,
    drop_last=config.drop_last,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=config.batch_size,
    shuffle=config.shuffle,
    num_workers=config.num_workers,
    pin_memory=config.pin_memory,
    drop_last=config.drop_last,
)

# Check whether data is splitted correctly -> X_.shape: (batch, seq, encoding), y_.shape: (batch)
for i, (X_, y_) in enumerate(train_loader):
    print("X.shape:", X_.shape, "   y.shape: ", y_.shape)
    break

TRAINING

In [None]:
# Freeze first layers of backbone
for name, param in model.named_parameters():
    if name == "backbone.6.0.conv1.weight":
        break
    param.requires_grad = False

In [None]:
# Observe initial performance of the model without any training
model.eval()
test(model, test_loader, device, config.project_name, save_model=False)
model.train();

In [None]:
# Loss and optimizer
criterion = nn.MSELoss()
# criterion = nn.L1Loss()
# criterion = nn.SmoothL1Loss()

backbone_params = list(model.backbone.parameters())
other_params = [
    param for name, param in model.named_parameters() if "backbone" not in name
]

# Define different learning rates
backbone_lr = config.backbone_lr
other_lr = config.other_lr

# Create parameter groups
param_groups = [
    {"params": backbone_params, "lr": backbone_lr},
    {"params": other_params, "lr": other_lr},
]

optimizer = torch.optim.AdamW(
    param_groups,
    betas=(config.beta1, config.beta2),
    eps=config.eps,
    weight_decay=config.weight_decay,
    amsgrad=config.amsgrad,
    maximize=config.maximize,
    foreach=config.foreach,
    capturable=config.capturable,
    differentiable=config.differentiable,
    fused=config.fused,
)

# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=config.gamma)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=config.factor,
    patience=config.patience,
    threshold=config.threshold,
    threshold_mode=config.threshold_mode,
    cooldown=config.cooldown,
    min_lr=[config.min_lr, config.min_lr],
    verbose=config.verbose,
)

print("optimizer: ", optimizer)
print("")
print("loss function: ", criterion)
print("")
print("scheduler", scheduler)
print("")

In [None]:
# Train the model
train(model, train_loader, val_loader, criterion, optimizer, scheduler, config)

In [None]:
# Test the model -> no need to compute gradients (for memory efficiency)
test(model, test_loader, device, config.project_name, save_model=True)

In [None]:
# Print learnable parameter values after training
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)

In [None]:
wandb.finish()

In [None]:
iter = 1
for name, layer in model.named_children():
    if name == "backbone":
        # print("LAYER ", iter, ": ", layer)
        # iter += 1
        for layer_num, layer in layer.named_children():
            print("Layer Number", layer_num, ":", layer)
        # print("Layer ", iter, ": ", layer)
        # print("")

In [None]:
for name, param in model.named_parameters():
    # if name == "backbone.6.0.conv1.weight":
    #     break
    print(name)

In [None]:
model_state_dict = torch.load("pretrained/model_state_dict_mse_0_0045.pth")
model.load_state_dict(model_state_dict)

img = Image.open(
    "data/CodingChallenge_v2/imgs/4d8d3780-e786-400f-b2fd-62eed728ba8c.jpg"
)
img_tensor = transform_norm(img).unsqueeze(0)

with torch.no_grad():
    output = model(img_tensor)
    print(output)