In [29]:
import os
import json
from tqdm import tqdm
from scipy.fft import dctn, idctn

import mlflow
import mlflow.pytorch

import numpy as np
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose, ToTensor, Normalize
from torchvision.models import resnet18
from torchvision.models.vision_transformer import VisionTransformer
from transformers import get_cosine_with_hard_restarts_schedule_with_warmup


from preprocessing.transforms import CompressedToTensor, ZigZagOrder, ChooseAC, FlattenZigZag, ConvertToFrequencyDomain, ConvertToYcbcr, Quantize, ScaledPixels, LUMINANCE_QUANTIZATION_MATRIX, CHROMINANCE_QUANTIZATION_MATRIX
from model.init import init_kaiming_normal, set_seed, resume_from_checkpoint, init_truncated_normal
from model.vit import CompressedVisionTransformer
from model.baseline import ResNet18

# 1. Training parameters init:

## 1.1 Variables init

In [30]:
DOWNLAOD_PATH = os.path.join('data', 'cifar10')
SEED = 42
VALIDATION_SET = 0.1
BATCH_SIZE = 128
AC = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

## 1.2 Preprocessing steps:

In [31]:
quantization_matrices = [LUMINANCE_QUANTIZATION_MATRIX, CHROMINANCE_QUANTIZATION_MATRIX, CHROMINANCE_QUANTIZATION_MATRIX]

if_you_want_to_get_an_RGB_image = Compose([
    ToTensor()
    # Returns pixels in range [0-1]
])

if_you_want_to_get_ycbcr_image = Compose([
    CompressedToTensor(), # 3x32x32
    # Returns pixels in range [0-255]
    ConvertToYcbcr(), # 3x32x32
    # Returns pixels in range [0-255]
    ScaledPixels()
    # Returns pixels in range [0-1]
])


transform = Compose([
    CompressedToTensor(), # 3x32x32
    # Returns pixels in range [0-255]
    ConvertToYcbcr(), # 3x32x32
    # Returns pixels in range [0-255]
    ConvertToFrequencyDomain(norm='ortho'), # 3x32x32
    Quantize(quantization_matrices=quantization_matrices, alpha=1.0, floor=True), # 3x32x32
    ZigZagOrder(), # 3x16x64
    ChooseAC(AC), # 3x16x(AC+1)
    FlattenZigZag() # 16x(3x(AC+1))
])

## 1.3 Model init:

In [32]:
MODEL_PARAMETERS = {
    'image_size': 32,
    'patch_size': AC, # for CompressedVisionTransformer set as AC, otherwise set to 8
    'num_layers': 4,
    'num_heads': 8,
    'hidden_dim': 248,
    'mlp_dim': 1024,
    'dropout': 0.1,
    'attention_dropout': 0.1,
    'num_classes':10
}


with set_seed(SEED): # Initialization of parameters now happens at __init__
    # model = LinVisionTransformer(**MODEL_PARAMETERS).to(DEVICE)
    # model = VisionTransformer(**MODEL_PARAMETERS).to(DEVICE)
    model = CompressedVisionTransformer(**MODEL_PARAMETERS).to(DEVICE)

## 1.4 Training init:

In [33]:
experiment_name = f'vit_full_frequency'
criterion = nn.CrossEntropyLoss()
lr = 1e-4
num_epochs = 10
weight_decay = 0.01
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)


checkpoint_every_nth_epoch = 1 # Set None for no checkpointing
start_epoch = 0
checkpoint_folder = 'checkpoints'

gradient_clipping = nn.utils.clip_grad_norm_
max_grad_norm = 1.0
warmup_steps = 1760 # 5 epochs
scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, 
        num_warmup_steps=warmup_steps, 
        num_training_steps=num_epochs * 352)

os.makedirs(checkpoint_folder, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_folder, experiment_name)

## 1.5 Resuming from a checkpoint

In [34]:
# load_checkpoint_path = 'change to .pth file path'
# model, optimizer, start_epoch = resume_from_checkpoint(checkpoint_path=load_checkpoint_path, model=model, optimizer=optimizer)

# 1.6 Collecting all parameters for logging:

In [35]:
PREPROCESSING_PARAMETERS = {
    f'step_{i}': type(t).__name__ for i, t in enumerate(transform.transforms)
}

TRAINING_PARAMETERS = {
    "criterion":type(criterion).__name__,
    "optimizer_type": type(optimizer).__name__,
    "seed":SEED,
    "batch_size":BATCH_SIZE,
    "validation":VALIDATION_SET,
    "gradient_clipping": max_grad_norm,
    "scheduler": type(scheduler).__name__,
    "warmup_steps": warmup_steps,
    **optimizer.defaults
}

MODEL_PARAMETERS = {
    "model": type(model).__name__,
    **MODEL_PARAMETERS
}

# 2. Extract, Transform and Load

In [36]:
cifar = CIFAR10(root=DOWNLAOD_PATH, train=True, transform=transform, target_transform=None, download = False)
cifar_test = CIFAR10(root=DOWNLAOD_PATH, train=False, transform=transform, target_transform=None, download = False)

In [37]:
with set_seed(SEED): # For reproducible results run any random operations with set_seed()
    num_train = len(cifar)
    num_val = int(0.1 * num_train)
    num_train -= num_val

    cifar_train, cifar_val = random_split(cifar, [num_train, num_val])

In [38]:
train = DataLoader(cifar_train, batch_size=BATCH_SIZE, shuffle=True)
val = DataLoader(cifar_val, batch_size=BATCH_SIZE, shuffle=True)
test = DataLoader(cifar_test, batch_size=cifar_test.__len__(), shuffle=False)

# 3. Parameter tracking for MLFlow

In [None]:
mlflow.set_experiment(experiment_name)
with mlflow.start_run():
    mlflow.log_param("preprocessing_steps", json.dumps(PREPROCESSING_PARAMETERS))
    mlflow.log_param("training_parameters", json.dumps(TRAINING_PARAMETERS))
    mlflow.log_param("model_parameters", json.dumps(MODEL_PARAMETERS))


    with set_seed(SEED):
        for epoch in range(start_epoch, num_epochs):
            model.train()
            train_loss = 0.0
            for images, labels in tqdm(train):
                images, labels = images.to(torch.float32).to(DEVICE), labels.to(DEVICE)

                optimizer.zero_grad()

                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                gradient_clipping(model.parameters(), max_grad_norm)
                optimizer.step()
                scheduler.step()

                train_loss += loss.detach().cpu().item() * images.size(0)

            train_loss /= len(train.dataset)

            mlflow.log_metric("train_loss", train_loss, step=epoch)

            model.eval()
            val_loss = 0.0
            correct = 0
            total = 0
            total_top2 = 0

            with torch.no_grad():
                for images, labels in val:
                    images, labels = images.to(torch.float32).to(DEVICE), labels.to(DEVICE)

                    outputs = model(images)
                    loss = criterion(outputs, labels)

                    val_loss += loss.item() * images.size(0)
                    _, predicted = torch.max(outputs.data, 1)

                    top2_pred = outputs.topk(2, dim=1).indices

                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    correct_top2 = 0
                    for i in range(labels.size(0)):
                        if labels[i] in top2_pred[i]:
                            correct_top2 += 1

                    total_top2 += correct_top2

            val_loss /= len(val.dataset)
            val_accuracy = correct / total
            top2_accuracy = total_top2 / total

            mlflow.log_metric("val_loss", val_loss, step=epoch)
            mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)
            mlflow.log_metric("val_top2accuracy", top2_accuracy, step=epoch)

            print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}, Top 2 Validation Accuracy: {top2_accuracy:.4f}')

            if checkpoint_every_nth_epoch:
                if (epoch + 1) % checkpoint_every_nth_epoch == 0:
                    save_path = f'{checkpoint_path}_epoch_{epoch + 1}.pth'
                    torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                    }, save_path)
                    print(f'Checkpoint saved to {save_path}')
        else:
            save_path = f'{checkpoint_path}_final.pth'
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, save_path)
            # mlflow.log_artifact(save_path)
            pass
            #TODO: Add more metrics

# 4. Saving results:

In [None]:
import shutil
import os

shutil.make_archive('saved_results', 'zip', 'saved_results')

In [None]:
import os
import boto3
from botocore.exceptions import NoCredentialsError

# Initialize the S3 client
s3 = boto3.client('s3')

# Define your bucket name and the folder path
bucket_name = '#########'
folder_path = 'saved_results'
s3_folder_path = 'run_one/saved_results'  # The path in S3 where the folder will be uploaded



def upload_directory_to_s3(folder_path, bucket_name, s3_folder_path):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            local_path = os.path.join(root, file)
            relative_path = os.path.relpath(local_path, folder_path)
            s3_path = os.path.join(s3_folder_path, relative_path).replace("\\", "/")

            try:
                s3.upload_file(local_path, bucket_name, s3_path)
                print(f'Successfully uploaded {local_path} to s3://{bucket_name}/{s3_path}')
            except FileNotFoundError:
                print(f'The file {local_path} was not found')
            except NoCredentialsError:
                print('Credentials not available')

# Upload the folder
upload_directory_to_s3(folder_path, bucket_name, s3_folder_path)