# DL PROJECT

In [None]:
# all installs
#pip install torch torchvision torchaudio

# all imports
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from torch.optim import Adam
import torch.nn as nn
import torchvision.models as models
from sklearn.model_selection import KFold, train_test_split
import numpy as np
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import copy
from torchvision.transforms import InterpolationMode
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import timm

Check for GPU

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

In [None]:
#################################################################################################

In [None]:
torch.cuda.empty_cache()

# garbage collection
import gc
gc.collect()

In [None]:
train_df = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/train.csv")
test_df = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/test.csv")


images_path = "/kaggle/input/plant-pathology-2020-fgvc7/images/"

class PlantDataset(Dataset):
    def __init__(self, df, images_path, transform=None):
        self.df = df
        self.images_path = images_path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.images_path, self.df.iloc[idx, 0] + ".jpg")
        image = Image.open(img_name)
        image = image.convert("RGB")

        if self.transform:
            image = self.transform(image)
            
        label = self.df.iloc[idx, 1:].values.astype("float")

        return image, torch.tensor(label, dtype=torch.float32)

train_df = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/train.csv")
images_path = "/kaggle/input/plant-pathology-2020-fgvc7/images/"

model_eff = timm.create_model('efficientnet_b4.ra2_in1k', pretrained=True)
data_config_eff = timm.data.resolve_model_data_config(model_eff)
transforms_eff = timm.data.create_transform(**data_config_eff, is_training=True)

model_vit = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
data_config_vit = timm.data.resolve_model_data_config(model_vit)
transforms_vit = timm.data.create_transform(**data_config_vit, is_training=True)

model_res = timm.create_model('resnet50.a1_in1k', pretrained=True)
data_config_res = timm.data.resolve_model_data_config(model_res)
transforms_res = timm.data.create_transform(**data_config_res, is_training=True)

transforms_eff_val = transforms_eff
transforms_vit_val = transforms_vit
transforms_res_val = transforms_res

In [None]:
train_dataset_eff = PlantDataset(train_df, images_path, transform=transforms_eff)
train_loader_eff = DataLoader(train_dataset_eff, batch_size=16, shuffle=True)

train_dataset_vit = PlantDataset(train_df, images_path, transform=transforms_vit)
train_loader_vit = DataLoader(train_dataset_vit, batch_size=16, shuffle=True)

train_dataset_res = PlantDataset(train_df, images_path, transform=transforms_res)
train_loader_res = DataLoader(train_dataset_res, batch_size=16, shuffle=True)

# efficientnet b4
class EfficientNetModel(nn.Module):
    def __init__(self, num_classes=4):
        super(EfficientNetModel, self).__init__()
        self.model = timm.create_model('efficientnet_b4.ra2_in1k', pretrained=True)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# vision transformer
class VisionTransformerModel(nn.Module):
    def __init__(self, num_classes=4):
        super(VisionTransformerModel, self).__init__()
        self.model = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
        self.model.head = nn.Linear(self.model.head.in_features, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# resnet50
class ResNetModel(nn.Module):
    def __init__(self, num_classes=4):
        super(ResNetModel, self).__init__()
        self.model = timm.create_model('resnet50.a1_in1k', pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# vitb
class VisionTransformerBModel(nn.Module):
    def __init__(self, num_classes=4):
        super(VisionTransformerBModel, self).__init__()
        self.model = timm.create_model('vit_base_patch14_reg4_dinov2.lvd142m', pretrained=True)
        self.model.head = nn.Linear(self.model.embed_dim, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# TRAIN
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    
    for images, labels in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    
    epoch_loss = running_loss / len(dataloader)
    return epoch_loss


# TEST
def val_epoch(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    
    progress_bar = tqdm(dataloader, desc="Validating", leave=False)
    
    with torch.no_grad():
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            progress_bar.set_postfix({'loss': loss.item()})

    epoch_loss = running_loss / len(dataloader)
    return epoch_loss

In [None]:
print(len(train_loader_eff))
print(len(val_loader_eff))

In [None]:
model_eff = EfficientNetModel()


model_eff.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_eff = Adam(model_eff.parameters(), lr=0.0001)

#scheduler_eff = ReduceLROnPlateau(optimizer_eff, mode='min', factor=0.5, patience=3, verbose=True)

num_epochs = 10


for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    #scheduler_eff.step()
    #scheduler_vit.step()
    
    # efficientnet
    train_loss_eff = train_epoch(model_eff, train_loader_eff, optimizer_eff, criterion)
    
    print(f"EfficientNet: Train Loss: {train_loss_eff:.4f}")
    
    #scheduler_eff.step(val_loss_eff)
    
    if (epoch % 5 == 0):
        torch.save(model_eff.state_dict(), f'eff_tmp_{epoch+1}')
        
# save the final models
torch.save(model_eff.state_dict(), "eff_f_all.pth")

In [None]:
print(len(train_loader_eff))

In [None]:
model_res = ResNetModel()


model_res.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_res = Adam(model_res.parameters(), lr=0.0001)

#scheduler_eff = ReduceLROnPlateau(optimizer_eff, mode='min', factor=0.5, patience=3, verbose=True)

num_epochs = 30


for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    #scheduler_eff.step()
    #scheduler_vit.step()
    
    # efficientnet
    train_loss_res = train_epoch(model_res, train_loader_res, optimizer_res, criterion)
    
    print(f"ResNet: Train Loss: {train_loss_res:.4f}")
    
    #scheduler_eff.step(val_loss_eff)
    
    if (epoch % 5 == 0):
        torch.save(model_res.state_dict(), f'res_tmp_{epoch+1}')
        
# save the final models
torch.save(model_res.state_dict(), "res_f_all.pth")

In [None]:
model_vit = VisionTransformerModel()


model_vit.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_vit = Adam(model_vit.parameters(), lr=0.0001)

#scheduler_eff = ReduceLROnPlateau(optimizer_eff, mode='min', factor=0.5, patience=3, verbose=True)

num_epochs = 15


for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    #scheduler_eff.step()
    #scheduler_vit.step()
    
    # efficientnet
    train_loss_vit = train_epoch(model_vit, train_loader_vit, optimizer_vit, criterion)
    
    print(f"VIT: Train Loss: {train_loss_vit:.4f}")
    
    #scheduler_eff.step(val_loss_eff)
    
    if (epoch % 5 == 0):
        torch.save(model_vit.state_dict(), f'vit_tmp_{epoch+1}')
        
# save the final models
torch.save(model_vit.state_dict(), "vit_f_all.pth")

In [None]:
#################################################################################################

All paths

In [None]:
train_df = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/train.csv")
test_df = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/test.csv")


images_path = "/kaggle/input/plant-pathology-2020-fgvc7/images/"

Split the train data into train and validation

In [None]:
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

## Dataset preparation
Dataset class for the plant dataset

In [None]:
class PlantDataset(Dataset):
    def __init__(self, df, images_path, transform=None):
        self.df = df
        self.images_path = images_path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.images_path, self.df.iloc[idx, 0] + ".jpg")
        image = Image.open(img_name)
        image = image.convert("RGB")

        if self.transform:
            image = self.transform(image)
            
        label = self.df.iloc[idx, 1:].values.astype("float")

        return image, torch.tensor(label, dtype=torch.float32)

Augmentations

In [None]:
model_eff = timm.create_model('efficientnet_b4.ra2_in1k', pretrained=True)
data_config_eff = timm.data.resolve_model_data_config(model_eff)
transforms_eff = timm.data.create_transform(**data_config_eff, is_training=True)

model_vit = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
data_config_vit = timm.data.resolve_model_data_config(model_vit)
transforms_vit = timm.data.create_transform(**data_config_vit, is_training=True)

model_res = timm.create_model('resnet50.a1_in1k', pretrained=True)
data_config_res = timm.data.resolve_model_data_config(model_res)
transforms_res = timm.data.create_transform(**data_config_res, is_training=True)

#model_vitb = timm.create_model('vit_base_patch14_reg4_dinov2.lvd142m', pretrained=True)
#data_config_vitb = timm.data.resolve_model_data_config(model_vitb)
#transforms_vitb = timm.data.create_transform(**data_config_vitb, is_training=True)

In [None]:
print(transforms_eff)
print(transforms_vit)
print(transforms_res)
#print(transforms_vitb)

transforms_eff_val = transforms_eff
transforms_vit_val = transforms_vit
transforms_res_val = transforms_res
#transforms_vitb_val = transforms_vitb

In [None]:
'''
transforms_eff = transforms.Compose([
    transforms.Resize(345, interpolation=InterpolationMode.BICUBIC),
    transforms.CenterCrop(320),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    #transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    #transforms.ColorJitter(brightness=(0.9, 1.05), contrast=(0.88, 1.2), saturation=(0.9, 1.1), hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transforms_vit = transforms.Compose([
    transforms.Resize(232, interpolation=InterpolationMode.BICUBIC),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    #transforms.ColorJitter(brightness=(0.9, 1.2), contrast=(0.9, 1.1), saturation=(0.9, 1.1), hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


transforms_eff_val = transforms.Compose([
    transforms.Resize(345, interpolation=InterpolationMode.BICUBIC),
    transforms.CenterCrop(320),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transforms_vit_val = transforms.Compose([
    transforms.Resize(232, interpolation=InterpolationMode.BICUBIC),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
'''

Create the datasets, dataloaders

In [None]:
train_dataset_eff = PlantDataset(train_df, images_path, transform=transforms_eff)
val_dataset_eff = PlantDataset(val_df, images_path, transform=transforms_eff_val)
train_loader_eff = DataLoader(train_dataset_eff, batch_size=16, shuffle=True)
val_loader_eff = DataLoader(val_dataset_eff, batch_size=16, shuffle=False)

train_dataset_vit = PlantDataset(train_df, images_path, transform=transforms_vit)
val_dataset_vit = PlantDataset(val_df, images_path, transform=transforms_vit_val)
train_loader_vit = DataLoader(train_dataset_vit, batch_size=16, shuffle=True)
val_loader_vit = DataLoader(val_dataset_vit, batch_size=16, shuffle=False)

train_dataset_res = PlantDataset(train_df, images_path, transform=transforms_res)
val_dataset_res = PlantDataset(val_df, images_path, transform=transforms_res_val)
train_loader_res = DataLoader(train_dataset_res, batch_size=16, shuffle=True)
val_loader_res = DataLoader(val_dataset_res, batch_size=16, shuffle=False)

#train_dataset_vitb = PlantDataset(train_df, images_path, transform=transforms_vitb)
#val_dataset_vitb = PlantDataset(val_df, images_path, transform=transforms_vitb_val)
#train_loader_vitb = DataLoader(train_dataset_vitb, batch_size=8, shuffle=True)
#val_loader_vitb = DataLoader(val_dataset_vitb, batch_size=8, shuffle=False)


#test_dataset = PlantDataset(test_df, images_path, transform=val_transform)

In [None]:
fig=plt.figure(figsize=(24, 12))
columns = 4
rows = 2

for i in range(columns * rows):
    input, label = train_dataset_eff[np.random.randint(len(train_dataset_eff))]
    print(label)
    img = input.detach().numpy().transpose((1, 2, 0))

    ax = fig.add_subplot(rows, columns, i + 1)
    label_names = str(label).replace("'", "").replace("[", "").replace("]", "")
    ax.set_title(label_names, fontstyle='italic')
    plt.imshow(img)
plt.show()


fig=plt.figure(figsize=(24, 12))
columns = 4
rows = 2

for i in range(columns * rows):
    input, label = train_dataset_vit[np.random.randint(len(train_dataset_vit))]
    print(label)
    img = input.detach().numpy().transpose((1, 2, 0))

    ax = fig.add_subplot(rows, columns, i + 1)
    label_names = str(label).replace("'", "").replace("[", "").replace("]", "")
    ax.set_title(label_names, fontstyle='italic')
    plt.imshow(img)
plt.show()

fig=plt.figure(figsize=(24, 12))
columns = 4
rows = 2

for i in range(columns * rows):
    input, label = train_dataset_res[np.random.randint(len(train_dataset_res))]
    print(label)
    img = input.detach().numpy().transpose((1, 2, 0))

    ax = fig.add_subplot(rows, columns, i + 1)
    label_names = str(label).replace("'", "").replace("[", "").replace("]", "")
    ax.set_title(label_names, fontstyle='italic')
    plt.imshow(img)
plt.show()

fig=plt.figure(figsize=(24, 12))
columns = 4
rows = 2

for i in range(columns * rows):
    input, label = train_dataset_vitb[np.random.randint(len(train_dataset_vitb))]
    print(label)
    img = input.detach().numpy().transpose((1, 2, 0))

    ax = fig.add_subplot(rows, columns, i + 1)
    label_names = str(label).replace("'", "").replace("[", "").replace("]", "")
    ax.set_title(label_names, fontstyle='italic')
    plt.imshow(img)
plt.show()

## Models
Idea:
- use 2 separate models: efficientnet and vision transformer
- finally avg the outputs of both models -> this is the final output

In [None]:
# efficientnet b4
class EfficientNetModel(nn.Module):
    def __init__(self, num_classes=4):
        super(EfficientNetModel, self).__init__()
        self.model = timm.create_model('efficientnet_b4.ra2_in1k', pretrained=True)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# vision transformer
class VisionTransformerModel(nn.Module):
    def __init__(self, num_classes=4):
        super(VisionTransformerModel, self).__init__()
        self.model = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
        self.model.head = nn.Linear(self.model.head.in_features, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# resnet50
class ResNetModel(nn.Module):
    def __init__(self, num_classes=4):
        super(ResNetModel, self).__init__()
        self.model = timm.create_model('resnet50.a1_in1k', pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)
    
    
# vitb
class VisionTransformerBModel(nn.Module):
    def __init__(self, num_classes=4):
        super(VisionTransformerBModel, self).__init__()
        self.model = timm.create_model('vit_base_patch14_reg4_dinov2.lvd142m', pretrained=True)
        self.model.head = nn.Linear(self.model.embed_dim, num_classes)

    def forward(self, x):
        return self.model(x)

Training loop, validation loop

In [None]:
# TRAIN
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    
    for images, labels in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    
    epoch_loss = running_loss / len(dataloader)
    return epoch_loss


# TEST
def val_epoch(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    
    progress_bar = tqdm(dataloader, desc="Validating", leave=False)
    
    with torch.no_grad():
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            progress_bar.set_postfix({'loss': loss.item()})

    epoch_loss = running_loss / len(dataloader)
    return epoch_loss

Training

In [None]:
model_eff = EfficientNetModel()
model_vit = VisionTransformerModel()
model_res = ResNetModel()
model_vitb = VisionTransformerBModel()

model_eff.to(device)
model_vit.to(device)
model_res.to(device)
model_vitb.to(device)

#criterion = nn.BCEWithLogitsLoss()
criterion = nn.CrossEntropyLoss()

optimizer_eff = Adam(model_eff.parameters(), lr=0.0001)
optimizer_vit = Adam(model_vit.parameters(), lr=0.0001)
optimizer_res = Adam(model_res.parameters(), lr=0.0001)
optimizer_vitb = Adam(model_vitb.parameters(), lr=0.0001)

scheduler_eff = ReduceLROnPlateau(optimizer_eff, mode='min', factor=0.5, patience=3, verbose=True)
scheduler_vit = ReduceLROnPlateau(optimizer_vit, mode='min', factor=0.5, patience=3, verbose=True)
scheduler_res = ReduceLROnPlateau(optimizer_res, mode='min', factor=0.5, patience=3, verbose=True)
scheduler_vitb = ReduceLROnPlateau(optimizer_vitb, mode='min', factor=0.5, patience=3, verbose=True)

num_epochs = 40

train_losses_eff = []
val_losses_eff = []

train_losses_vit = []
val_losses_vit = []

train_losses_res = []
val_losses_res = []

train_losses_vitb = []
val_losses_vitb = []

best_model_eff = None
best_val_loss_eff = np.inf

best_model_vit = None
best_val_loss_vit = np.inf

best_model_res = None
best_val_loss_res = np.inf

best_model_vitb = None
best_val_loss_vitb = np.inf

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    #scheduler_eff.step()
    #scheduler_vit.step()
    
    # efficientnet
    train_loss_eff = train_epoch(model_eff, train_loader_eff, optimizer_eff, criterion)
    val_loss_eff = val_epoch(model_eff, val_loader_eff, criterion)
    train_losses_eff.append(train_loss_eff)
    val_losses_eff.append(val_loss_eff)
    
    
    # vit
    train_loss_vit = train_epoch(model_vit, train_loader_vit, optimizer_vit, criterion)
    val_loss_vit = val_epoch(model_vit, val_loader_vit, criterion)  
    train_losses_vit.append(train_loss_vit)
    val_losses_vit.append(val_loss_vit)
    
    
    # res
    train_loss_res = train_epoch(model_res, train_loader_res, optimizer_res, criterion)
    val_loss_res = val_epoch(model_res, val_loader_res, criterion)
    train_losses_res.append(train_loss_res)
    val_losses_res.append(val_loss_res)
    
    # vitb
    train_loss_vitb = train_epoch(model_vitb, train_loader_vitb, optimizer_vitb, criterion)
    val_loss_vitb = val_epoch(model_vitb, val_loader_vitb, criterion)
    train_losses_vitb.append(train_loss_vitb)
    val_losses_vitb.append(val_loss_vitb)
    
    
    
    print(f"EfficientNet: Train Loss: {train_loss_eff:.4f}, Val Loss: {val_loss_eff:.4f}")
    print(f"Vision Transformer: Train Loss: {train_loss_vit:.4f}, Val Loss: {val_loss_vit:.4f}")
    print(f"ResNet: Train Loss: {train_loss_res:.4f}, Val Loss: {val_loss_res:.4f}")
    print(f"Vision Transformer Better: Train Loss: {train_loss_vitb:.4f}, Val Loss: {val_loss_vitb:.4f}")
    
    scheduler_eff.step(val_loss_eff)
    scheduler_vit.step(val_loss_vit)
    scheduler_res.step(val_loss_vit)
    scheduler_vitb.step(val_loss_vit)
    
    if val_loss_eff < best_val_loss_eff:
        best_val_loss_eff = val_loss_eff
        best_model_eff = copy.deepcopy(model_eff)
        # save the model
        torch.save(best_model_eff.state_dict(), "best_model_eff.pth")
        print("eff saved")
        
    if val_loss_vit < best_val_loss_vit:
        best_val_loss_vit = val_loss_vit
        best_model_vit = copy.deepcopy(model_vit)
        # save the model
        torch.save(best_model_vit.state_dict(), "best_model_vit.pth")
        print("vit saved")
        
    if val_loss_res < best_val_loss_res:
        best_val_loss_res = val_loss_res
        best_model_res = copy.deepcopy(model_res)
        # save the model
        torch.save(best_model_res.state_dict(), "best_model_res.pth")
        print("res saved")
        
    if val_loss_vitb < best_val_loss_vitb:
        best_val_loss_vitb = val_loss_vitb
        best_model_vitb = copy.deepcopy(model_vitb)
        # save the model
        torch.save(best_model_vitb.state_dict(), "best_model_vitb.pth")
        print("vitb saved")
        
# save the final models
torch.save(model_eff.state_dict(), "final_model_eff.pth")
torch.save(model_vit.state_dict(), "final_model_vit.pth")
torch.save(model_res.state_dict(), "final_model_res.pth")
torch.save(model_vitb.state_dict(), "final_model_vitb.pth")

Plot the training and validation losses

In [None]:
plt.plot(train_losses_eff, label="Train Loss EfficientNet")
plt.plot(val_losses_eff, label="Val Loss EfficientNet")
plt.legend()
plt.show()

plt.plot(train_losses_vit, label="Train Loss VIT")
plt.plot(val_losses_vit, label="Val Loss VIT")
plt.legend()
plt.show()

plt.plot(train_losses_res, label="Train Loss ResNet")
plt.plot(val_losses_res, label="Val Loss ResNet")
plt.legend()
plt.show()

plt.plot(train_losses_vitb, label="Train Loss VIT-B")
plt.plot(val_losses_vitb, label="Val Loss VIT-B")
plt.legend()
plt.show()

print(train_losses_eff)
print(val_losses_eff)

print(train_losses_res)
print(val_losses_res)

print(train_losses_vit)
print(val_losses_vit)

Test the model, predict and save the results

3 approahes:
- only efficientnet
- only vision transformer
- avg of both

In [None]:
import torch.nn.functional as F

# predict 
def predict(model, dataloader):
    model.eval()
    predictions = []
    
    progress_bar = tqdm(dataloader, desc="Predicting", leave=False)
    
    with torch.no_grad():
        for images, labels in progress_bar:
            images = images.to(device)
            
            outputs = model(images)
            #print(outputs)
            #print(outputs.shape)
            
            probabilities = F.softmax(outputs, dim=1)
            predictions.append(probabilities.cpu().numpy())
    
    return np.concatenate(predictions)

In [None]:
# load the best models
model_eff = EfficientNetModel()
model_vit = VisionTransformerModel()
model_res = ResNetModel()
model_vitb = VisionTransformerBModel()

model_eff.load_state_dict(torch.load("eff_f_all.pth"))
model_vit.load_state_dict(torch.load("vit_f_all.pth"))
model_res.load_state_dict(torch.load("res_f_all.pth"))
model_vitb.load_state_dict(torch.load("best_model_vitb.pth"))

model_eff.to(device)
model_vit.to(device)
model_res.to(device)
model_vitb.to(device)

print(device)

In [None]:
test_dataset_eff = PlantDataset(test_df, images_path, transform=transforms_eff_val)
test_loader_eff = DataLoader(test_dataset_eff, batch_size=8, shuffle=False)

test_dataset_vit = PlantDataset(test_df, images_path, transform=transforms_vit_val)
test_loader_vit = DataLoader(test_dataset_vit, batch_size=8, shuffle=False)

test_dataset_res = PlantDataset(test_df, images_path, transform=transforms_res_val)
test_loader_res = DataLoader(test_dataset_res, batch_size=8, shuffle=False)

test_dataset_vitb = PlantDataset(test_df, images_path, transform=transforms_vitb_val)
test_loader_vitb = DataLoader(test_dataset_vitb, batch_size=8, shuffle=False)

# eff predict
predictions_eff = predict(model_eff, test_loader_eff)

# vit predict
predictions_vit = predict(model_vit, test_loader_vit)

# res predict
predictions_res = predict(model_res, test_loader_res)

# vitb predict
predictions_vitb = predict(model_vitb, test_loader_vitb)

# avg predict
predictions_avg = (predictions_eff + predictions_vit + predictions_res) / 3

# save the predictions into 3 separate files
submissions_eff = pd.DataFrame(predictions_eff, columns=["healthy", "multiple_diseases", "rust", "scab"])
submissions_eff["image_id"] = test_df["image_id"]
submissions_eff = submissions_eff[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
submissions_eff.to_csv("submission_eff.csv", index=False)

submissions_vit = pd.DataFrame(predictions_vit, columns=["healthy", "multiple_diseases", "rust", "scab"])
submissions_vit["image_id"] = test_df["image_id"]
submissions_vit = submissions_vit[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
submissions_vit.to_csv("submission_vit.csv", index=False)

submissions_res = pd.DataFrame(predictions_res, columns=["healthy", "multiple_diseases", "rust", "scab"])
submissions_res["image_id"] = test_df["image_id"]
submissions_res = submissions_res[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
submissions_res.to_csv("submission_res.csv", index=False)

submissions_vitb = pd.DataFrame(predictions_vitb, columns=["healthy", "multiple_diseases", "rust", "scab"])
submissions_vitb["image_id"] = test_df["image_id"]
submissions_vitb = submissions_vitb[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
submissions_vitb.to_csv("submission_vitb.csv", index=False)

submissions_avg = pd.DataFrame(predictions_avg, columns=["healthy", "multiple_diseases", "rust", "scab"])
submissions_avg["image_id"] = test_df["image_id"]
submissions_avg = submissions_avg[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
submissions_avg.to_csv("submission_avg.csv", index=False)

In [None]:
predictions_avg_3 = (predictions_eff + predictions_vit + predictions_res) / 3
predictions_avg_3 = pd.DataFrame(predictions_avg_3, columns=["healthy", "multiple_diseases", "rust", "scab"])
predictions_avg_3["image_id"] = test_df["image_id"]
predictions_avg_3 = submissions_avg[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
predictions_avg_3.to_csv("submission_avg3.csv", index=False)

In [None]:
# now combine all 4 models into 1 - train a seperate model that intakes embeddings from all 4 models and predicts the final output

class EnsembleModel(nn.Module):
    def __init__(self, num_classes=4, input_dim=4096):
        super(EnsembleModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 1024)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        
        self.fc2 = nn.Linear(1024, 512)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        
        self.fc3 = nn.Linear(512, 256)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.5)
        
        self.fc4 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.dropout3(x)
        
        x = self.fc4(x)
        
        return x

In [None]:
def get_all_embeddings(model, dataloader):
    model.eval()
    embeddings = []
    labels = []
    
    progress_bar = tqdm(dataloader, desc="Getting Embeddings", leave=False)
    
    with torch.no_grad():
        for images, targets in progress_bar:
            images = images.to(device)
            targets = targets.to(device)
            
            outputs = model(images)
            embeddings.append(outputs.cpu().numpy())
            labels.append(targets.cpu().numpy())
            
        embeddings = np.concatenate(embeddings)
        labels = np.concatenate(labels)
    
    return embeddings, labels

In [None]:
# load the best models
model_eff = EfficientNetModel()
model_vit = VisionTransformerModel()
model_res = ResNetModel()
model_vitb = VisionTransformerBModel()

model_eff.load_state_dict(torch.load("best_model_eff.pth"))
model_vit.load_state_dict(torch.load("best_model_vit.pth"))
model_res.load_state_dict(torch.load("best_model_res.pth"))
model_vitb.load_state_dict(torch.load("best_model_vitb.pth"))

# for each model remove the last layer
model_eff.model.classifier = nn.Identity()
model_vit.model.head = nn.Identity()
model_res.model.fc = nn.Identity()
model_vitb.model.head = nn.Identity()

model_eff.to(device)
model_vit.to(device)
model_res.to(device)
model_vitb.to(device)

# get the embeddings
train_embeddings_eff, train_labels_eff = get_all_embeddings(model_eff, train_loader_eff)
train_embeddings_vit, train_labels_vit = get_all_embeddings(model_vit, train_loader_vit)
train_embeddings_res, train_labels_res = get_all_embeddings(model_res, train_loader_res)
train_embeddings_vitb, train_labels_vitb = get_all_embeddings(model_vitb, train_loader_vitb)

print(f'sizes: {train_embeddings_eff.shape}, {train_embeddings_vit.shape}, {train_embeddings_res.shape}')

# combine the embeddings
train_embeddings = np.concatenate([train_embeddings_eff, train_embeddings_vit, train_embeddings_res, axis=1)

size = train_embeddings.shape[1]

# create the ensemble model
ensemble_model = EnsembleModel(num_classes=4, input_dim=size)
ensemble_model.to(device)

In [None]:
print(train_embeddings.shape)

In [None]:
val_embeddings_eff, val_labels_eff = get_all_embeddings(model_eff, val_loader_eff)
val_embeddings_vit, val_labels_vit = get_all_embeddings(model_vit, val_loader_vit)
val_embeddings_res, val_labels_res = get_all_embeddings(model_res, val_loader_res)
val_embeddings_vitb, val_labels_vitb = get_all_embeddings(model_vitb, val_loader_vitb)

val_embeddings = np.concatenate([val_embeddings_eff, val_embeddings_vit, val_embeddings_res], axis=1)

val_labels_eff = torch.tensor(val_labels_eff, dtype=torch.long).to(device)
val_labels_vit = torch.tensor(val_labels_vit, dtype=torch.long).to(device)
val_labels_res = torch.tensor(val_labels_res, dtype=torch.long).to(device)
val_labels_vitb = torch.tensor(val_labels_vitb, dtype=torch.long).to(device)

print(val_embeddings.shape)

In [None]:
# normalize the embeddings
train_embeddings = (train_embeddings - np.mean(train_embeddings, axis=0)) / np.std(train_embeddings, axis=0)
val_embeddings = (val_embeddings - np.mean(val_embeddings, axis=0)) / np.std(val_embeddings, axis=0)

In [None]:
# TRAIN
ensemble_model = EnsembleModel(num_classes=4, input_dim=size)
ensemble_model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = Adam(ensemble_model.parameters(), lr=0.0001)

num_epochs = 5
best_model_ensamble = None
best_val_loss_ensamble = np.inf

train_losses_ensamble = []
val_losses_ensamble = []

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    ensemble_model.train()
    running_loss = 0.0
    
    # train
    for i in range(len(train_embeddings)):
        embeddings = train_embeddings[i]
        labels = train_labels_eff[i]
        
        embeddings = torch.tensor(embeddings, dtype=torch.float32).to(device)
        labels = torch.tensor(labels, dtype=torch.float32).to(device)
        
        optimizer.zero_grad()
        
        outputs = ensemble_model(embeddings)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    epoch_loss_train = running_loss / len(train_embeddings)
    train_losses_ensamble.append(epoch_loss_train)
    
    
    # validate
    ensemble_model.eval()
    running_loss = 0.0
    
    for i in range(len(train_embeddings)):
        embeddings = train_embeddings[i]
        labels = train_labels_eff[i]
        
        embeddings = torch.tensor(embeddings, dtype=torch.float32).to(device)
        labels = torch.tensor(labels, dtype=torch.float32).to(device)
        
        outputs = ensemble_model(embeddings)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item()
        
    epoch_loss_val = running_loss / len(train_embeddings)
    val_losses_ensamble.append(epoch_loss_val)
    
    print(f"Train Loss: {epoch_loss_train:.4f}, Val Loss: {epoch_loss_val:.4f}")
    
    if epoch_loss_val < best_val_loss_ensamble:
        best_val_loss_ensamble = epoch_loss_val
        best_model_ensamble = copy.deepcopy(ensemble_model)
        torch.save(best_model_ensamble.state_dict(), "best_model_ensamble.pth")
        print("ensamble saved")
        
        
# save the final model
torch.save(ensemble_model.state_dict(), "final_model_ensamble.pth")

In [None]:
# PREDICT
# load the best model
ensemble_model = EnsembleModel(num_classes=4, input_dim=size)
ensemble_model.load_state_dict(torch.load("best_model_ensamble.pth"))
ensemble_model.to(device)

# get the embeddings
test_embeddings_eff, _ = get_all_embeddings(model_eff, test_loader_eff)
test_embeddings_vit, _ = get_all_embeddings(model_vit, test_loader_vit)
test_embeddings_res, _ = get_all_embeddings(model_res, test_loader_res)
test_embeddings_vitb, _ = get_all_embeddings(model_vitb, test_loader_vitb)

# combine the embeddings
test_embeddings = np.concatenate([test_embeddings_eff, test_embeddings_vit, test_embeddings_res, test_embeddings_vitb], axis=1)



In [None]:
def predict_ensemble(model, test_embeddings):
    model.eval()
    predictions = []
    
    with torch.no_grad():
        for i in range(len(test_embeddings)):
            embeddings = test_embeddings[i]
            embeddings = torch.tensor(embeddings, dtype=torch.float32).to(device)
            
            #print(embeddings.shape)
            outputs = model(embeddings)
            outputs = outputs.unsqueeze(0)
            #print(outputs)
            #print(outputs.shape)
            
            probabilities = F.softmax(outputs, dim=1)
            predictions.append(probabilities.cpu().numpy())
            
    return np.concatenate(predictions)

In [None]:
predictions_ensemble = predict_ensemble(ensemble_model, test_embeddings)

# save the predictions
submissions_ensemble = pd.DataFrame(predictions_ensemble, columns=["healthy", "multiple_diseases", "rust", "scab"])
submissions_ensemble["image_id"] = test_df["image_id"]
submissions_ensemble = submissions_ensemble[["image_id", "healthy", "multiple_diseases", "rust", "scab"]]
submissions_ensemble.to_csv("submission_ensemble.csv", index=False)