In [None]:

!cp -r /kaggle/input/trainer /kaggle/working/trainer

In [2]:


import torch
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
#from torchvision.transforms import functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.sampler import WeightedRandomSampler
from torch.optim.lr_scheduler import MultiStepLR
from operator import itemgetter
from trainer import Trainer, hooks, configuration
from trainer.utils import setup_system, patch_configs
from trainer.metrics import AccuracyEstimator
from trainer.tensorboard_visualizer import TensorBoardVisualizer
from dataclasses import dataclass
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pandas as pd
from skimage import io
import numpy as np

from PIL import Image


%matplotlib inline



DATA_ROOT = "/kaggle/input/pytorch-opencv-course-classification/"
IMAGES_ROOT = DATA_ROOT + '/images/images/'



ModuleNotFoundError: ignored

In [None]:
def image_preprocess_transforms():
    
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
        ])
    
    return preprocess

In [None]:
def image_common_transforms(mean=(0.4611, 0.4359, 0.3905), std=(0.2193, 0.2150, 0.2109)):
    preprocess = image_preprocess_transforms()
    
    common_transforms = transforms.Compose([
        preprocess,
        transforms.Normalize(mean, std)
    ])
    
    return common_transforms
    

In [None]:
def image_training_transforms():
    common_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                             [0.229, 0.224, 0.225])
        ])        
    return common_transforms

In [None]:
class KenyanFood13Dataset(Dataset):
    def __init__(self,csv_file,root_dir,transform=None):
        self.annotations = pd.read_csv(csv_file)
        #print(self.annotations)
        self.root_dir = root_dir
        self.transform = transform

        num_classes = self.annotations['class'].unique()
        self.classes_list = num_classes.tolist()
        #print(num_classes)
        #print(len(num_classes))
        #print(self.classes_list)
        
        self.idx2class = {i: key for i, key in enumerate(self.classes_list)}
        self.class_to_idx = {key: i for i, key in enumerate(self.classes_list)}
        #print(self.class_to_idx)
        self.data_dict = {
            'image_path': [],
            'label': [],
            'is_train':[]
        }
        
        img_dir = os.path.join(root_dir, 'images', 'images')

        for index, row in self.annotations.iterrows():
            img_path = os.path.join(img_dir, '{}.jpg'.format(row[0]))
            self.data_dict['image_path'].append(img_path)
            self.data_dict['label'].append(self.class_to_idx[row[1]])
            self.data_dict['is_train'].append(True)
       


    def __len__(self):
        return len(self.annotations)

    def __getitem__(self,index):
        img_path = os.path.join(self.root_dir,'images','images',str(self.annotations.iloc[index,0]))
        img_path = img_path + '.jpg'
        #image = io.imread(img_path)
        #image = Image.fromarray(image)
        image = Image.open(img_path).convert("RGB")
        target = torch.from_numpy(np.array(self.data_dict['label'][index]))
        if self.transform:
            #image1 = self.transform(image1)
            image = self.transform(image)
        return (image,target)

In [None]:
def get_mean_std(dataset,num_workers=4):
    
    transform = image_preprocess_transforms()
    
    #loader = data_loader(data_root, transform)
    loader = data_loader(dataset, transform)

    mean = 0.
    std = 0.
    
    for images,_ in loader:
        batch_samples = images.size(0)
        images = images.view(batch_samples,images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)

    mean /= len(loader.dataset)
    std /= len(loader.dataset)
    
    print('mean: {}, std: {}'.format(mean, std))
    
    return mean, std

In [None]:
def  data_augmentation_preprocess(mean, std):
    preprocess = image_preprocess_transforms()
    train_transforms = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.CenterCrop(224),
        transforms.RandomChoice([
                
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.RandomRotation(20, fill=(0,0,0)),
                #transforms.RandomCrop(28, padding=4)
                #transforms.RandomAffine(30, translate=None, scale=None, shear=None, resample=False, fillcolor=0)
                # -30,45
                transforms.RandomAffine(
                degrees=(-10,30),
                translate=(0.25, 0.5),
                scale=(1.2, 2.0),
                shear=0,
                resample=False)
                
                
            ]),
            
            transforms.ColorJitter(
            brightness=0.4,
            contrast=0.4,
            saturation=0.4),
            transforms.ToTensor(),
            transforms.RandomErasing(scale=(0.02, 0.16), ratio=(0.3, 1.6)),
            transforms.Normalize(mean,std)
    ])
    return train_transforms

In [None]:
def data_loader(dataset, transform, batch_size=16, shuffle=False, num_workers=2):
    #dataset = KenyanFood13Dataset(data_root, transform=transform)
    
    loader = torch.utils.data.DataLoader(dataset, 
                                         batch_size=batch_size,
                                         num_workers=num_workers,
                                         shuffle=shuffle)
    
    return loader

In [None]:
def get_data(batch_size, data_root, num_workers=1,transform=None,data_augmentation=False):
    csv_file =  os.path.join(DATA_ROOT, 'train.csv')
    dataset =  KenyanFood13Dataset(csv_file,data_root, transform=transform)
    
    dataset_size = len(dataset)
    train_len = int(0.8*len(dataset))
    valid_len = len(dataset) - train_len    
    
    train_dataset, val_dataset = torch.utils.data.random_split(dataset,[train_len, valid_len])
    
    mean,std = get_mean_std(train_dataset,num_workers=num_workers)
    
    common_transforms = image_common_transforms(mean, std)
        
    if data_augmentation:    
        train_transforms = data_augmentation_preprocess(mean, std)
    else:
        train_transforms = common_transforms
    
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, 
                                                shuffle=True, num_workers=num_workers)
    test_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size,
                                                    shuffle=False, num_workers=num_workers)
    return train_loader, test_loader

In [1]:
def pretrained_resnet50(transfer_learning=True, num_class=13):
    resnet = models.resnet50(pretrained=True)
    vgg = models.
    
    if transfer_learning:
        for param in resnet.parameters():
            param.requires_grad = False
            
    last_layer_in = resnet.fc.in_features
    resnet.fc = nn.Linear(last_layer_in, num_class)
    
    return resnet

In [None]:
class Experiment:
    def __init__(
        self,
        system_config: configuration.SystemConfig = configuration.SystemConfig(),
        dataset_config: configuration.DatasetConfig = configuration.DatasetConfig(),
        dataloader_config: configuration.DataloaderConfig = configuration.DataloaderConfig(),
        optimizer_config: configuration.OptimizerConfig = configuration.OptimizerConfig()
    ):
        self.loader_train, self.loader_test = get_data(
            #batch_size=dataloader_config.batch_size,
            batch_size=32,
            #num_workers=dataloader_config.num_workers,
            num_workers=8,
            data_root=dataset_config.root_dir,
            transform=image_training_transforms(),
            data_augmentation=True
        )
        
        setup_system(system_config)

        #self.model = get_model()
        
        self.model = pretrained_resnet50(transfer_learning=False)
        
        self.loss_fn = nn.CrossEntropyLoss()
        self.metric_fn = AccuracyEstimator(topk=(1, ))
        self.optimizer = optim.SGD(
            self.model.parameters(),
            #lr=optimizer_config.learning_rate,
            lr=0.001,
            #weight_decay=optimizer_config.weight_decay,
            weight_decay=0.0001,
            momentum=optimizer_config.momentum
        )
        self.lr_scheduler = MultiStepLR(
            self.optimizer, milestones=optimizer_config.lr_step_milestones, gamma=optimizer_config.lr_gamma
        )
        self.visualizer = TensorBoardVisualizer()

    def run(self, trainer_config: configuration.TrainerConfig) -> dict:

        device = torch.device(trainer_config.device)
        self.model = self.model.to(device)
        self.loss_fn = self.loss_fn.to(device)
       
        model_trainer = Trainer(
            model=self.model,
            loader_train=self.loader_train,
            loader_test=self.loader_test,
            loss_fn=self.loss_fn,
            metric_fn=self.metric_fn,
            optimizer=self.optimizer,
            lr_scheduler=self.lr_scheduler,
            device=device,
            data_getter=itemgetter(0),
            target_getter=itemgetter(1),
            stage_progress=trainer_config.progress_bar,
            get_key_metric=itemgetter("top1"),
            visualizer=self.visualizer,
            model_saving_frequency=trainer_config.model_saving_frequency,
            save_dir=trainer_config.model_dir
        )
        
        model_trainer.register_hook("end_epoch", hooks.end_epoch_hook_classification)
        self.metrics = model_trainer.fit(trainer_config.epoch_num)
        return self.metrics

In [None]:
def main():
    '''Run the experiment
    '''
    # patch configs depending on cuda availability
    dataloader_config, trainer_config = patch_configs(epoch_num_to_set=5)
   
    dataset_config = configuration.DatasetConfig(root_dir=DATA_ROOT)
    experiment = Experiment(dataset_config=dataset_config, dataloader_config=dataloader_config)
    results = experiment.run(trainer_config)

    return results

In [None]:
if __name__ == '__main__':
    main()

mean: tensor([ 0.3991,  0.0255, -0.2683]), std: tensor([1.0464, 1.1029, 1.0955])


Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=5.0), HTML(value='')), layout=Layout(disp…

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=164.0), HTML(value='')), layout=Layout(di…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=41.0), HTML(value='')), layout=Layout(dis…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=164.0), HTML(value='')), layout=Layout(di…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=41.0), HTML(value='')), layout=Layout(dis…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=164.0), HTML(value='')), layout=Layout(di…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=41.0), HTML(value='')), layout=Layout(dis…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=164.0), HTML(value='')), layout=Layout(di…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=41.0), HTML(value='')), layout=Layout(dis…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=164.0), HTML(value='')), layout=Layout(di…




HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=41.0), HTML(value='')), layout=Layout(dis…





In [None]:

#!cp -r /kaggle/runs/. /kaggle/working/runs
#!tensorboard dev upload --logdir /kaggle/runs/. \
#  --name "Project2Classification Experiment" \
#  --description "https://www.kaggle.com/dnyandeobharambe/project2-classification" \
#  --one_shot
#!tar -zcvf outputname.tar.gz /kaggle/working
#%load_ext tensorboard
#%tensorboard dev upload --logdir /kaggle/runs/. --name "Project2Classification Experiment" --description "https://www.kaggle.com/dnyandeobharambe/project2-classification" --one_shot

In [None]:
## shared notebook https://www.kaggle.com/dnyandeobharambe/project2-classification
## profile : https://www.kaggle.com/dnyandeobharambe

In [None]:

#1) DataLoader   KenyanFood13Dataset class in this notebook
#2) Configuration  /kaggle/working/trainer/configuration.py or configuration.ipynb in this implenetation 
#no values set in configuration all corresponding values set/parameters in main method and 
#Experiment class in above notebook.
#3) Evalution Matric is /kaggle/working/trainer/metrics.py or matrics.ipynb
#4) Train and Validation is /kaggle/working/trainer/trainer.py or trainer.ipynb
#5) Model is pretrained_resnet50 function in above notebook
#6) Utils is /kaggle/working/trainer/utils.py or utils.ipynb
#7) Experiement  this implemented in above notebook in class Experiment
#8) Tensorboard log under /kaggle/working/runs as zip or tar
#9) Kaggle profile link https://www.kaggle.com/dnyandeobharambe


