In [None]:
import numpy as np
import sys
sys.path.append("../")

In [None]:
from Utils.CustomFunctions import Train_Classifiers

# Classic ML

In [None]:
x = np.load("../Data/Images/Image Embeddings/EfficientNet.npy")
y = np.load("../Data/Text/TF-IDF/labels.npy")

In [None]:
models = Train_Classifiers(x, y)

In [None]:
models.fit()

In [None]:
models.score()

In [None]:
models.Compare_ConfusionMatrices()

In [None]:
models.Compare_Performance()

In [None]:
lgbm = models.get_single_model('LightGBM')

In [None]:
lgbm.predict_proba(x).shape

# CNN

In [1]:
from torchvision.models.efficientnet import efficientnet_b1, EfficientNet_B1_Weights, efficientnet_v2_s, EfficientNet_V2_S_Weights
import torch
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
data = pd.read_csv("../Data/Images/ImageLabelsSequenced.csv")

In [3]:
weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1
model = efficientnet_v2_s(weights=weights)

In [4]:
num_classes = 3
    
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=1280, out_features=num_classes)
)

In [5]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.NAdam(model.parameters())
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', 0.4, 8)

In [6]:
train, test = train_test_split(data, test_size=0.2, shuffle=True, stratify=data['LABEL'])
train, val = train_test_split(train, test_size=0.2, shuffle=True, stratify=train['LABEL'])

In [7]:
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, filename, labels, transform) -> None:
        super().__init__()
        self.img_dir = img_dir
        self.filename = filename
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.filename[index])
        image = Image.open(img_path)
        image = image.convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[index])
        return (image, label)
    
train_set = CustomImageDataset("../Data/Images", np.array(train['File Name']), np.array(train['LABEL']), weights.transforms())
val_set = CustomImageDataset("../Data/Images", np.array(val['File Name']), np.array(val['LABEL']), weights.transforms())

In [8]:
train_loader = DataLoader(train_set, 16)
val_loader = DataLoader(val_set, 16)

In [9]:
from tqdm.notebook import tqdm
def TrainLoopCompact(
    model,
    optimizer:torch.optim.Optimizer,
    criterion:torch.nn.Module,
    train_dataloader:torch.utils.data.DataLoader,
    val_dataloader:torch.utils.data.DataLoader,
    scheduler:torch.optim.lr_scheduler.ReduceLROnPlateau,
    num_epochs:int=20,
    early_stopping_rounds:int=5,
    return_best_model:bool=True,
    device:str='cpu'
):
    model.to(device)
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    best_model_weights = model.state_dict()

    for epoch in tqdm(range(num_epochs)):
        model.train()
        print("\nEpoch {}\n----------".format(epoch))
        train_loss = 0
        for i, (images, labels) in enumerate(train_dataloader):
            images = images.to(device)
            labels = labels.to(device, dtype=torch.long)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            train_loss += loss
            loss.backward()
            optimizer.step()
            print("Loss for batch {} = {}".format(i, loss))

        print("\nTraining Loss for epoch {} = {}\n".format(epoch, train_loss))

        model.eval()
        validation_loss = 0
        with torch.inference_mode():
            for (images, labels) in val_dataloader:
                images = images.to(device)
                labels = labels.to(device, dtype=torch.long)
                outputs = model(images)
                loss = criterion(outputs, labels)
                validation_loss += loss

            if validation_loss < best_val_loss:
                best_val_loss = validation_loss
                epochs_without_improvement = 0
                best_model_weights = model.state_dict()
            else:
                epochs_without_improvement += 1

            print(f"Current Validation Loss = {validation_loss}")
            print(f"Best Validation Loss = {best_val_loss}")
            print(f"Epochs without Improvement = {epochs_without_improvement}")
        scheduler.step(validation_loss)
        if epochs_without_improvement == early_stopping_rounds:
            break

    if return_best_model == True:
        model.load_state_dict(best_model_weights)

In [None]:
TrainLoopCompact(model, optimizer, loss_fn, train_loader, val_loader, scheduler, 100, 15, device='cuda')