# Imports

#### Timm

In [1]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

#### Settings

In [2]:
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

#### General

In [3]:
from collections import defaultdict
import pandas as pd
import numpy as np
import os
import random
import gc
import cv2
import glob
gc.enable()
pd.set_option('display.max_columns', None)

#### Image Augmentation

In [4]:
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

#### Deep Learning

In [5]:
from torch.utils.data import Dataset, DataLoader
import torch
import torchvision
import timm
import torch.nn as nn
import torch.nn.functional as F

#### Seeds

In [6]:
RANDOM_SEED = 42

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything()

#### Device

In [7]:
device = torch.device('cpu')

# Data

#### Input

In [8]:
data_dir = './input'   # image folder
models_dir = './models' # model folder
test_file_path = os.path.join(data_dir, 'sample_submission.csv') # csv file with name of images
train_file_path = './input/train_5_folds.csv' # dont need to care abt dis

In [9]:
image_ids = os.listdir('./upload')
folder_len = len(image_ids)
folder_len

3469

In [28]:
image_ids

['202919.jpg',
 '200868.jpg',
 '200698.jpg',
 '200840.jpg',
 '201586.jpg',
 '203391.jpg',
 '202931.jpg',
 '202925.jpg',
 '203385.jpg',
 '200854.jpg',
 '201592.jpg',
 '201579.jpg',
 '200667.jpg',
 '202070.jpg',
 '202716.jpg',
 '203408.jpg',
 '200101.jpg',
 '200115.jpg',
 '202702.jpg',
 '202064.jpg',
 '200673.jpg',
 '203352.jpg',
 '200883.jpg',
 '201545.jpg',
 '201223.jpg',
 '203434.jpg',
 '203420.jpg',
 '200129.jpg',
 '201237.jpg',
 '200897.jpg',
 '201551.jpg',
 '203346.jpg',
 '202058.jpg',
 '201960.jpg',
 '201974.jpg',
 '203193.jpg',
 '201784.jpg',
 '201790.jpg',
 '201948.jpg',
 '203187.jpg',
 '202299.jpg',
 '200303.jpg',
 '202514.jpg',
 '202272.jpg',
 '200465.jpg',
 '200471.jpg',
 '202266.jpg',
 '203178.jpg',
 '202500.jpg',
 '201009.jpg',
 '200317.jpg',
 '202528.jpg',
 '201021.jpg',
 '200459.jpg',
 '201747.jpg',
 '203150.jpg',
 '203144.jpg',
 '201753.jpg',
 '201035.jpg',
 '201814.jpg',
 '201800.jpg',
 '201196.jpg',
 '200288.jpg',
 '201828.jpg',
 '201182.jpg',
 '200277.jpg',
 '201169.j

In [33]:
test_df = pd.read_csv(test_file_path)
train_df = pd.read_csv(train_file_path)

In [34]:
test_df = test_df[(test_df.index < folder_len)]
test_df['image_id'] = image_ids

In [35]:
test_df

Unnamed: 0,image_id,label
0,202919.jpg,
1,200868.jpg,
2,200698.jpg,
3,200840.jpg,
4,201586.jpg,
...,...,...
3464,200681.jpg,
3465,200871.jpg,
3466,202900.jpg,
3467,202914.jpg,


In [36]:
test_df['image_path'] = test_df.apply(lambda row: './upload/' + row['image_id'], axis=1)

In [37]:
test_df

Unnamed: 0,image_id,label,image_path
0,202919.jpg,,./upload/202919.jpg
1,200868.jpg,,./upload/200868.jpg
2,200698.jpg,,./upload/200698.jpg
3,200840.jpg,,./upload/200840.jpg
4,201586.jpg,,./upload/201586.jpg
...,...,...,...
3464,200681.jpg,,./upload/200681.jpg
3465,200871.jpg,,./upload/200871.jpg
3466,202900.jpg,,./upload/202900.jpg
3467,202914.jpg,,./upload/202914.jpg


#### Labels

In [15]:
label2id = {'bacterial_leaf_blight': 0,
            'bacterial_leaf_streak': 1,
            'bacterial_panicle_blight': 2,
            'blast': 3,
            'brown_spot': 4,
            'dead_heart': 5,
            'downy_mildew': 6,
            'hispa': 7,
            'normal': 8,
            'tungro': 9}

id2label = {v: k for k, v in label2id.items()}

#### Params

In [16]:
params = {
    'model': 'efficientnet_b3',
    'pretrained': False,
    'inp_channels': 3,
    'im_size': 300,
    'device': device,
    'batch_size': 85,
    'num_workers' : 0,
    'out_features': train_df['label'].nunique(),
    'dropout': 0.2,
    'num_fold': train_df['kfold'].nunique(),
    'debug': False,
}

#### Transform

In [17]:
# TRANSFORMS 
def get_test_transforms(DIM = params['im_size']):
    return albumentations.Compose(
        [
          albumentations.Resize(DIM,DIM),
          albumentations.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
          ),
          ToTensorV2(p=1.0)
        ]
    )

#### Dataset

In [24]:
class PaddyDataset(Dataset):
    def __init__(self, images_filepaths, transform=None):
        self.images_filepaths = images_filepaths
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        
        return image

# Deep Learning

#### Neural Net

In [19]:
class PaddyNet(nn.Module):
    def __init__(self, model_name=params['model'], out_features=params['out_features'], inp_channels=params['inp_channels'],
                 pretrained=params['pretrained']):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels)
        out_channels = self.model.conv_stem.out_channels
        kernel_size = self.model.conv_stem.kernel_size
        stride = self.model.conv_stem.stride
        padding = self.model.conv_stem.padding
        bias = self.model.conv_stem.bias
        self.model.conv_stem = nn.Conv2d(inp_channels, out_channels,
                                          kernel_size=kernel_size, stride=stride,
                                          padding=padding, bias=bias)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.dropout = nn.Dropout(params['dropout'])
        self.fc = nn.Linear(n_features, out_features)
    
    def forward(self, image):
        embeddings = self.model(image)
        x = self.dropout(embeddings)
        output = self.fc(x)
        return output

#### Prediction

In [40]:
pred_cols = []

for i, model_name in enumerate(glob.glob(models_dir + '/*.pth')):
    model = PaddyNet()
    model.load_state_dict(torch.load(model_name, map_location=torch.device('cpu')))
    model = model.to(params['device'])
    model.eval()
    
    X_test = test_df['image_path']

    test_dataset = PaddyDataset(
        images_filepaths=X_test.values,
        transform = get_test_transforms()
    )
    
    test_loader = DataLoader(
        test_dataset, batch_size=params['batch_size'],
        shuffle=False, num_workers=params['num_workers'],
        pin_memory=True
    )

    temp_preds = None
    with torch.no_grad():
        for images in test_loader:
            images = images.to(params['device'], non_blocking=True)
            predictions = model(images).softmax(dim=1).argmax(dim=1).to('cpu').numpy()
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.hstack((temp_preds, predictions))

    test_df[f'model_{i}_preds'] = temp_preds
    pred_cols.append(f'model_{i}_preds')

In [41]:
test_df['label'] = test_df[pred_cols].mode(axis=1)[0]
test_df = test_df[['image_id', 'label']]
test_df['label'] = test_df['label'].map(id2label)

test_df.to_csv('./output/submission.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['label'] = test_df['label'].map(id2label)
