# Corn classification 3

In [1]:
import torch
import torch.nn as nn
from skimage import io, transform

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self,image_path,features,targets,augmentations=None):
        self.image_path = image_path
        self.features = features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_path)
    
    def __getitem__(self,item):
        image = io.imread(self.image_path[item])
        features = self.features[item,:]
        targets = self.targets[item]
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)

        return {
            "image": torch.tensor(image, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.long),
            "targets": torch.tensor(targets, dtype=torch.long),
        }

In [2]:
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def train(model,train_loader,device,optimizer):
    model.train()
    running_train_loss = 0.0
    for data in train_loader:
        inputs = data['image']
        features = data['features']
        targets = data['targets']

        inputs = inputs.to(device, dtype=torch.float)
        features = features.to(device,dtype=torch.float)
        targets = targets.to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(inputs,)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        loss.backward()
        optimizer.step()
        running_train_loss +=loss.item()
        
    train_loss_value = running_train_loss/len(train_loader)
    print(f'train Cross Entropy loss is {train_loss_value}')
    
def eval(model,valid_loader,device,optimizer):
    model.eval()
    final_targets = []
    final_outputs = []
    running_val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs = data['image']
            features = data['features']
            targets = data['targets']
            inputs = inputs.to(device, dtype=torch.float)
            features = features.to(device,dtype=torch.float)
            targets = targets.to(device, dtype=torch.long)

            output = model(inputs)
            running_val_loss += nn.CrossEntropyLoss()(output, targets)
            _,output = torch.max(nn.Softmax(dim=1)(output), 1)
            targets = (targets.detach().cpu().numpy()).tolist()
            output = output.detach().cpu().numpy().tolist()
            final_outputs.extend(output)
            final_targets.extend(targets)
        val_loss = running_val_loss/len(valid_loader)    
        print(f'valid Cross Entropy loss is {val_loss}')
    return final_outputs,final_targets      

In [4]:
import torch
import torch.nn as nn

In [5]:
def debug_model():
    return timm.create_model("resnet18",num_classes=4)

In [6]:
import pandas as pd
import numpy as np
from sklearn import model_selection
df = pd.read_csv('C:/Users/BIBER/Desktop/kaggle-pog-series-s01e03/corn/train.csv')
df["kfold"] = -1

df = df.sample(frac=1).reset_index(drop=True)

kf = model_selection.StratifiedKFold(n_splits=5, shuffle=False)

for fold, (train_idx, val_idx) in enumerate(kf.split(X=df,y=df.label.values)):
    print(len(train_idx), len(val_idx))
    df.loc[val_idx, 'kfold'] = fold

11457 2865
11457 2865
11458 2864
11458 2864
11458 2864


In [7]:
view_map = {
    "top":0,
    "bottom":1
}

target_map = {
    "pure": 0,
    "broken":1,
    "silkcut":2,
    "discolored":3,
}

In [8]:
df["label"].replace(target_map, inplace=True)
df["view"].replace(view_map, inplace=True)

In [18]:
import os
from sklearn import metrics
import albumentations
device = 'cpu'
epochs = 16

train_aug = albumentations.Compose(
    [
        albumentations.Resize(224, 224, p=1),
        albumentations.HueSaturationValue(
            hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5
        ),
        albumentations.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
        ),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

valid_aug = albumentations.Compose(
    [
        albumentations.Resize(224, 224, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

In [19]:
feats = ['view']

In [20]:
from itertools import chain
scores = []
root = "C:/Users/BIBER/Desktop/kaggle-pog-series-s01e03/corn/"
for fold in range(5):
        max_accuracy = 0.0
        model = debug_model()
        model.to(device)
        df_train = df[df.kfold != fold].reset_index(drop=True)
        df_valid = df[df.kfold == fold].reset_index(drop=True)

        df_train = df_train.drop(columns = 'kfold')
        df_valid = df_valid.drop(columns = 'kfold')

        train_images = df_train.image.values.tolist()
        train_images = [root + i for i in train_images]
        
        valid_images = df_valid.image.values.tolist()
        valid_images = [root + i for i in valid_images]
        
        train_targets = df_train.label.values
        valid_targets = df_valid.label.values

        train_dataset = CustomDataset(image_path = train_images,features=df_train[feats].values,targets = train_targets,augmentations=train_aug)
        train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=8,shuffle=True,pin_memory=True) 
        valid_dataset = CustomDataset(image_path = valid_images,features=df_valid[feats].values,targets =valid_targets,augmentations=valid_aug)
        valid_loader = torch.utils.data.DataLoader(valid_dataset,batch_size=8,shuffle=False,pin_memory=True) 

        optimizer = torch.optim.Adam(model.parameters(),lr=1e-6)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, T_mult=1, eta_min=1e-4, last_epoch=- 1, verbose=True)
        print(f'============================== FOLD -- {fold} ==============================')
        for epoch in range(epochs):
            print(f'==================== Epoch -- {epoch} ====================')
            train(model=model,train_loader=train_loader,device=device,optimizer=optimizer)
            
            final_outputs,final_targets = eval(model=model,valid_loader=valid_loader,device=device,optimizer=optimizer)
    
            accuracy = np.sqrt(metrics.accuracy_score(final_targets,final_outputs))
            scheduler.step()
            
            print(f'valid accuracy={accuracy}')
            if max_accuracy < accuracy:
                torch.save(model.state_dict(),'model-'+str(fold)+'.pth')
                max_accuracy = accuracy
        scores.append(accuracy)




train Cross Entropy loss is 1.3434841399914887
valid Cross Entropy loss is 1.3012676239013672
valid accuracy=0.5761398899354818
train Cross Entropy loss is 1.2689494649097792
valid Cross Entropy loss is 1.232853651046753
valid accuracy=0.7074768972973069


KeyboardInterrupt: 

In [None]:
scores