# our baseline model will be ResNet-50 
### Score: 0.71629

### Data Loading

In [None]:
# Library

import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import torchvision
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import itertools

In [None]:
conda install pillow=6.2.1

In [None]:
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
TRAIN_PATH = '../input/rice-disease-classification/source/Images'
TEST_PATH = '../input/rice-disease-classification/source/Images'

In [None]:
os.listdir('../input/rice-disease-classification/source')

In [None]:
BASE_DIR = '../input/rice-disease-classification/source/'

train = pd.read_csv(os.path.join(BASE_DIR, 'train.csv'))
test = pd.read_csv(os.path.join(BASE_DIR, 'test.csv'))

display(train.head())
display(test.head())

In [None]:
train_rgb = train.loc[~train['Image_id'].str.contains('_rgn')]
train_rgb = train_rgb.reset_index(drop=True)
train_rgb.head()

In [None]:
ss = train = pd.read_csv(os.path.join(BASE_DIR, 'sample_submission.csv'))
ss.head()

### class mapping

In [None]:
# class mapping
'''
class_mapping = {label: idx for idx, label in enumerate(np.unique(train['Label']))}
print(class_mapping)
train['Label'] = train['Label'].map(class_mapping)
display(train.head())
'''

### Img Dataset

In [None]:
IMG_DIR = '../input/rice-disease-classification/source/Images'
img_path_ = os.path.join(IMG_DIR, train.iloc[0]['Image_id'])
img_path_rgn_ = os.path.join(IMG_DIR, train.iloc[0]['Image_id'].replace('.jpg', '_rgn.jpg'))
img_ = np.array(Image.open(img_path_))
img_rgn_ = np.array(Image.open(img_path_rgn_))

In [None]:
class Img_Dataset(Dataset):
    def __init__(self, file_path, transform, table,is_rgn=False, is_train=True):
        self.file_path = file_path
        self.transform = transform
        self.table = table
        self.is_train = is_train
        
        self.img_name_list = self.table['Image_id'].tolist()
        self.img_list = []
        
        if not is_rgn:
            for img_name in self.img_name_list:
                img = Image.open(os.path.join(self.file_path, img_name))
                img_transformed = self.transform(img)
                self.img_list.append(img_transformed)
        else:
            for img_name in self.img_name_list:
                img_rgn = Image.open(os.path.join(self.file_path, img_name.replace('.jpg', '_rgn.jpg')))
                img_rgn_transformed = self.transform(img_rgn)
                self.img_list.append(img_rgn_transformed)
                
        if self.is_train:
            self.label_list =  [0 if label == 'blast' else (1 if label == 'brown' else 2) for label in self.table['Label'].tolist()]
        
    def __len__(self):
        return len(self.table)
    
    def __getitem__(self, index):
        if self.is_train:
            return self.img_list[index], self.label_list[index]
        else:
            return self.img_list[index]

In [None]:
def make_data_loader(batch_size = 128, split=0.8):
    transform=transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    
    train_dataset = Img_Dataset(IMG_DIR, transform, train_rgb)
    train_size = int(len(train_dataset) * split)
    val_size = len(train_dataset) - train_size
    
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)
    
    test_dataset = Img_Dataset(IMG_DIR, transform, test, is_train=False)
    test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [None]:
def train_model(device, model, train_loader, val_loader, criterion, optimizer, num_epochs=5):
    model = model.to(device)
    
    dl = {'train': train_loader,
          'val': val_loader}
    
    val_lael = []
    val_pred = []
    val_loss = 0.0
    
    for epoch in range(num_epochs):
        val_label = []
        val_pred = []

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            
            for inputs, labels in dl[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                    _, preds = torch.max(outputs, 1)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    if phase == 'val' and epoch == num_epochs - 1:
                        val_label += labels.tolist()
                        val_pred += preds.tolist()
                        
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = running_loss / len(dl[phase].dataset)
            epoch_acc = running_corrects.double() / len(dl[phase].dataset)
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            val_loss = epoch_loss
            
        # model.load_state_dict(torch.load('checkpoint.pt'))
    return model, val_label, val_pred

In [None]:
def test_model(device, model, test_loader):
    test_pred = []
    model.eval()
    model = model.to(device)
    with torch.set_grad_enabled(False):
        for features in test_loader:
            features = features.to(device)
            outputs = model(features.to(torch.float))
            probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
            test_pred.append(np.argmax(probabilities.tolist()))
            
    return test_pred

### Model

In [None]:
class myResNet50(nn.Module):
    def __init__(self, num_classes):
        super(myResNet50, self).__init__()
        self.model_ft = models.resnet50(pretrained=True)
        num_ftrs = self.model_ft.fc.in_features
        self.model_ft.fc = nn.Linear(num_ftrs, num_classes)
    
    def forward(self, x):
        out = self.model_ft(x)
        return out

In [None]:
resnet50_tf = myResNet50(3)

In [None]:
torch.__version__  # should be 0.4.1

In [None]:
conda install pytorch=0.4.1 -c pytorch

In [None]:
torchvision.__version__ # should be 0.2.1

In [None]:
conda install torchvision=0.2.1 -c pytorch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50_tf.parameters(), lr=3e-4)

In [None]:
train_loader, val_loader, test_loader = make_data_loader(batch_size=128)

In [None]:
len(train_loader.dataset)

In [None]:
len(val_loader.dataset)

In [None]:
len(test_loader.dataset)

In [None]:
resnet50_1, val_label, val_pred = train_model(device, resnet50_tf, train_loader, val_loader, criterion, optimizer, 30)

In [None]:
def plot_confusion_matrix(cm, target_names=None, labels=True):
    accuracy = np.trace(cm) / float(np.sum(cm))

    cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(9, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.colorbar()
    thresh = cm.max() / 2

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names)
        plt.yticks(tick_marks, target_names)

    if labels:
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, "{:,}".format(cm[i, j]), horizontalalignment="center",
                     color="white" if cm[i,j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
classes = ['blast', 'brown', 'healthy']
plot_confusion_matrix(confusion_matrix(val_label, val_pred), target_names=classes)

In [None]:
test_pred = test_model(device, resnet50_tf, test_loader)

In [None]:
ss.loc[:,'Label'] = test_pred
ss.to_csv('result_resnet50.csv', index=False)
ss.head()

### Score: 0.71629

In [None]:
!