### Load train_df, and test_df 

In [24]:
import pickle
import pandas as pd

with open('train_df.pkl', 'rb') as f:
    train_df = pd.read_pickle(f)
with open('test_df.pkl', 'rb') as f:
    test_df = pd.read_pickle(f)


train_df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab,image_path,label,target
0,Train_0,0,0,0,1,data/images/Train_0.jpg,scab,3
1,Train_1,0,1,0,0,data/images/Train_1.jpg,multiple_diseases,1
2,Train_2,1,0,0,0,data/images/Train_2.jpg,healthy,0
3,Train_3,0,0,1,0,data/images/Train_3.jpg,rust,2
4,Train_4,1,0,0,0,data/images/Train_4.jpg,healthy,0


#### Get tr_loader, val_loader and test_loader

In [25]:
import importlib
import util_functions
importlib.reload(util_functions)



<module 'util_functions' from '/Users/jungchulwoo/Desktop/personal /Self_Project/Plant_Pathology/util_functions.py'>

In [34]:
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations.pytorch import ToTensorV2
import util_functions as uf 

train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['target'], random_state=2025)

tr_transform = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Blur(p=0.3),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()  # Missing ToTensorV2() in validation transform
])

# The create_tr_val_loader function exists in util_functions.py but needs to be imported
# Make sure util_functions.py is in the same directory and properly imported
tr_loader, val_loader = uf.create_tr_val_loader(train_df, val_df, tr_transform, val_transform)

#### Create EfficientNet Model

In [2]:
from timm import create_model

model = create_model('efficientnetv2_rw_m', pretrained=True, num_classes=4)

### Execute tranier 

In [3]:
print(model)

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): EdgeResidual(
        (conv_exp): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (se): Identity()
        (conv_pwl): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (1): EdgeResidual(
        (conv_exp)

In [38]:
import trainer, predictor
importlib.reload(trainer)
importlib.reload(predictor)

<module 'predictor' from '/Users/jungchulwoo/Desktop/personal /Self_Project/Plant_Pathology/predictor.py'>

#### Fine Tuning 

In [None]:
def freeze_feature_extractor(model):
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False
    return model

model = freeze_feature_extractor(model)


- First training only with classifier layer

In [None]:
from torch.optim import Adam 
import torch.nn as nn 
import torch.optim 
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchmetrics.classification import AUROC
from trainer import Trainer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = Adam(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.2)
metric = AUROC(task='multiclass', num_classes=4)

# Looking at trainer.py, we only need cutmix_prob parameter
cutmix_prob = 0.3

trainer = Trainer(model, optimizer, tr_loader, val_loader,
                 scheduler, device, metric, cutmix_prob)

history = trainer.fit(epochs=30)

Epoch 1 [Training..]:   3%|▎         | 2/73 [01:00<35:08, 29.69s/it, loss=1.78, metric=0.309]

- Second Training with whole layers

In [None]:
for param in model.parameters():
    param.requires_grad = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = Adam(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.2)
metric = AUROC(task='multiclass', num_classes=4)

# Looking at trainer.py, we only need cutmix_prob parameter
cutmix_prob = 0.3

trainer = Trainer(model, optimizer, tr_loader, val_loader,
                 scheduler, device, metric, cutmix_prob)

history = trainer.fit(epochs=30)


### Predict with test_data 

In [26]:
IMAGE_DIR = 'data/images'
test_df['image_path'] = IMAGE_DIR + '/' + test_df['image_id'] + '.jpg'

test_transform = A.Compose([
    A.Resize(416,416),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

test_dataset = PlantPathologyDataset(image_paths=test_df['path'],labels=None, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

Unnamed: 0,image_id,image_path
0,Test_0,data/images/Test_0.jpg
1,Test_1,data/images/Test_1.jpg
2,Test_2,data/images/Test_2.jpg
3,Test_3,data/images/Test_3.jpg
4,Test_4,data/images/Test_4.jpg


In [None]:
from predictor import Predictor
import numpy as np

model = trainer.get_trained_model()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
predictor = Predictor(model, device)

prob_all_list= []

for image in test_loader:
    probas = predictor.predict_proba(image).cpu().numpy()
    prob_all_list.extend(probas)

prob_all = np.array(prob_all_list)

### Make submission file

In [None]:
pred_df = pd.DataFrame(prob_all, columns=['healthy', 'multiple_diseases', 'rust', 'scab'])
submit_df = pd.concat([test_df['image_id'], pred_df], axis=1)
submit_df.to_csv('submission.csv', index=False)

In [None]:
!kaggle competitions submit -c plant-pathology-2020-fgvc7 -f submission.csv -m "My_Plant_Pathology_Model"

### Overfitting has occurred. private score is 0.78
#### To prevent overfitting, I will train without val_data 

In [16]:
import pandas as pd

test_df = pd.read_csv("data/test.csv")
train_df = pd.read_csv("data/train.csv")

IMAGE_DIR = '/data/images'
train_df['path'] = IMAGE_DIR + '/' + train_df['image_id'] + '.jpg'
test_df['path'] = IMAGE_DIR + '/' + test_df['image_id'] + '.jpg'


def get_label(x):
    if x['healthy'] == 1:
        return 'healthy'
    elif x['multiple_diseases'] == 1:
        return 'multiple_diseases'
    elif x['rust'] == 1:
        return 'rust'
    elif x['scab'] == 1:
        return 'scab'
    else: return 'None'

train_df['label'] = train_df.apply(lambda x:get_label(x), axis=1)

label_mapping = {'healthy':0, 'multiple_diseases':1, 'rust':2, 'scab':3 }
train_df['target'] = train_df['label'].map(label_mapping)

train_df.head()


Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab,path,label,target
0,Train_0,0,0,0,1,/data/images/Train_0.jpg,scab,3
1,Train_1,0,1,0,0,/data/images/Train_1.jpg,multiple_diseases,1
2,Train_2,1,0,0,0,/data/images/Train_2.jpg,healthy,0
3,Train_3,0,0,1,0,/data/images/Train_3.jpg,rust,2
4,Train_4,1,0,0,0,/data/images/Train_4.jpg,healthy,0


- 1. Training only with classifier layer

In [None]:
import pandas as pd 
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader
import timm
import util_functions as uf



tr_transform = A.Compose([
    A.Resize(320, 320),
    A.HorizontalFlip(p=0.3),
    A.VerticalFlip(p=0.3),
    A.RandomBrightnessContrast(p=0.2),
    A.Blur(p=0.2),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

train_set = uf.PlantPathologyDataset(train_df['path'], train_df['target'], tr_transform)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, num_workers=4)

model = timm.create_model('efficientnetv2_rw_m', pretrained=True, num_classes=4)
model = freeze_feature_extractor(model=model)
optimizer = Adam(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.2)
metric = AUROC(task='multiclass', num_classes=4)
cutmix_prob = 0.2

trainer = Trainer(model, optimizer, train_loader, None, scheduler, device, metric, cutmix_prob)
history = trainer.fit(epochs=30)

- 2. Training with whole layers

In [None]:
for param in model.parameters():
    param.requires_grad = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = Adam(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.2)
metric = AUROC(task='multiclass', num_classes=4)
cutmix_prob = 0.2

trainer = Trainer(model, optimizer, train_loader, None, scheduler, device, metric, cutmix_prob)
history = trainer.fit(epochs=30)


In [None]:
from predictor import Predictor
import numpy as np

model = trainer.get_trained_model()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
predictor = Predictor(model, device)

prob_all_list= []

for image in test_loader:
    probas = predictor.predict_proba(image).cpu().numpy()
    prob_all_list.extend(probas)

prob_all = np.array(prob_all_list)

pred_df = pd.DataFrame(prob_all, columns=['healthy', 'multiple_diseases', 'rust', 'scab'])
submit_df = pd.concat([test_df['image_id'], pred_df], axis=1)
submit_df.to_csv('submission.csv', index=False)

###  Last Submisson get 0.92 private Score