In [None]:
# Colab's file access feature
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
%cd /content/gdrive/My Drive/Cassava-Competition/


In [None]:
!pip install efficientnet_pytorch -q

In [None]:
!pip3 install pretrainedmodels -q

In [None]:
#Run once per session
!pip install fastai2 -q

In [None]:
from sklearn.model_selection import StratifiedKFold
import numpy as np

In [None]:
from fastai2.basics import *
from fastai2.vision.all import *
from fastai2.callback.all import *
from efficientnet_pytorch import EfficientNet
import pretrainedmodels as pm

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(0)

In [None]:
#defaults.device = torch.device('cuda')

In [None]:
path = 'train/'
#val_path = 'data/val'
test_path = 'test/test'

In [None]:
batch_tfms = [IntToFloatTensor(), *aug_transforms(size=(300, 300)), Normalize.from_stats(*imagenet_stats)]
item_tfms = [ToTensor(), RandomResizedCrop(320)]
bs = 16

In [None]:
train_imgs = get_image_files(path)
#tst_imgs = get_image_files(val_path)
test_imgs = get_image_files(test_path)
random.shuffle(train_imgs)

In [None]:
len(train_imgs)

In [None]:
start_val = len(train_imgs) - int(len(train_imgs)*.2)

idxs = list(range(start_val, len(train_imgs)))

splits = IndexSplitter(idxs)

split = splits(train_imgs)

split_list = [split[0], split[1]]

split_list.append(L(range(len(train_imgs), len(train_imgs)+len(test_imgs))))
#split_list.append(L(range(len(tst_imgs)+len(train_imgs), len(tst_imgs)+len(train_imgs)+len(test_imgs))))

In [None]:
split_list

Let's check

In [None]:
#train_imgs+tst_imgs+test_imgs
dsrc = Datasets(train_imgs+test_imgs, tfms=[[PILImage.create], [parent_label, Categorize]],
                splits = split_list)

In [None]:
show_at(dsrc.train, 3)

In [None]:
dls = dsrc.dataloaders(bs=bs, after_item=item_tfms, after_batch=batch_tfms)

In [None]:
dls.show_batch()

In [None]:
class Head(nn.Module):
    def __init__(self, f_in, num_classes=1000, p=0.0):
        super(Head, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p)
        self.fc = nn.Linear(f_in, num_classes)

    def forward(self, x):
      x = self.avgpool(x)
      x = x.view(x.size(0), -1)
      x = self.fc(self.dropout(x))
      return x

In [None]:
model = EfficientNet.from_pretrained('efficientnet-b5')
model._fc = nn.Linear(2048, dls.c)

Cross-Validation

In [None]:
from sklearn.model_selection import StratifiedKFold
import random

Now let's make our K-Fold

In [None]:
kf = StratifiedKFold(n_splits=5, shuffle=True)

First let's grab all the labels from our dataset

In [None]:
train_labels = L()
for i in range(len(dsrc.train)):
  train_labels.append(dsrc.train[i][1])
  
for i in range(len(dsrc.valid)):
  train_labels.append(dsrc.valid[i][1])
train_labels

In [None]:
def get_sresnext(pretrained=True,**kwargs):
  return pm.se_resnext101_32x4d(num_classes=1000, pretrained='imagenet')

In [None]:
#kf = StratifiedKFold(n_splits=5, shuffle=True)
batch_tfms = [IntToFloatTensor(), *aug_transforms( size=(448, 448)), Normalize.from_stats(*imagenet_stats)]
item_tfms = [ToTensor(), Resize(448)]
bs = 4

In [None]:
# pct_start: Percentage of total number of iterations when the learning rate rises during one cycle

In [None]:
kf = StratifiedKFold(n_splits=6, shuffle=True)

Finally we need to define a training loop to go over all our folds and gather our validation and test accuracy

In [None]:
val_pct = []
tst_preds = []
skf = StratifiedKFold(n_splits=10, shuffle=True)
opt_func = partial(Adam, wd=0.01, eps=1e-3)
save_c = SaveModelCallback(monitor='error_rate')
mixup = MixUp()
#loss_func=LabelSmoothingCrossEntropy()
#loss_func=CrossEntropyLossFlat()
for _, val_idx in kf.split(np.array(train_imgs), train_labels):
  splits = IndexSplitter(val_idx)
  split = splits(train_imgs)
  split_list = [split[0], split[1]]
  dsrc = Datasets(train_imgs, tfms=[[PILImage.create], [parent_label, Categorize]],
                  splits=split_list)
  dls = dsrc.dataloaders(bs=bs, after_item=item_tfms, after_batch=batch_tfms)
  test_dls = dls.test_dl(test_imgs)

  learn = Learner(dls, model, metrics=[error_rate, accuracy], cbs=[mixup, save_c, ShowGraphCallback])

  learn.unfreeze()
  learn.fit_one_cycle(5, 1e-4)

  preds, _= learn.get_preds(dl=test_dls)
  tst_preds.append(preds)
  print('********************************************************************************************************')

In [None]:
len(tst_preds)

In [None]:
hat1 = tst_preds[0]
for pred in tst_preds[1:]:
  hat1 += pred

hat1 /= len(tst_preds)

In [None]:
hat1

In [None]:
import pandas as pd
import os

In [None]:
#y = torch.from_numpy(hat)

In [None]:
_, predicted_class = hat1.max(axis=1)
class_labels = np.array(['cbb', 'cbsd', 'cgm', 'cmd', 'healthy'])
predicted_class_label = class_labels[predicted_class]
names = np.array([item.name for item in test_imgs])

In [None]:
submission = {"Category":predicted_class_label, "Id":names,}
submission_df = pd.DataFrame(submission)
submission_df.head()

In [None]:
submission = {"Category":predicted_class_label, "Id":names,}
submission_df = pd.DataFrame(submission)
submission_df.head()

In [None]:
submission_df.to_csv('b-eff3-cv.csv', columns=["Category","Id"], index = False)

In [None]:
def get_sresnext(pretrained=True,**kwargs):
  return pm.se_resnext101_32x4d(num_classes=1000)

In [None]:
val_pct = []
tst_preds = []
skf = StratifiedKFold(n_splits=10, shuffle=True)
#opt_func = partial(Adam, wd=0.01, eps=1e-3)
save_c = SaveModelCallback(monitor='error_rate')
mixup = MixUp()
#loss_func=LabelSmoothingCrossEntropy()
#loss_func=CrossEntropyLossFlat()
for _, val_idx in kf.split(np.array(train_imgs), train_labels):
  splits = IndexSplitter(val_idx)
  split = splits(train_imgs)
  split_list = [split[0], split[1]]
  dsrc = Datasets(train_imgs, tfms=[[PILImage.create], [parent_label, Categorize]],
                  splits=split_list)

  dls = dsrc.dataloaders(bs=bs, after_item=item_tfms, after_batch=batch_tfms)
  test_dls = dls.test_dl(test_imgs)

  learn = Learner(dls, get_sresnext, cut=-2, custom_head=Head(2048, dls.c, 0.0),
                      metrics=[error_rate, accuracy], cbs=[mixup, save_c, ShowGraphCallback])

  learn.unfreeze()
  learn.fit_one_cycle(10, 0.0002, pct_start=0.0002, div=100)

  batch_tfms = [IntToFloatTensor(), *aug_transforms(size=(300, 300)), Normalize.from_stats(*imagenet_stats)]
  item_tfms = [ToTensor(), Resize(300)]
  bs = 16
  dls = dsrc.dataloaders(bs=bs, after_item=item_tfms, after_batch=batch_tfms)
  test_dls = dls.test_dl(test_imgs)

  preds, _= learn.get_preds(dl=test_dls)
  tst_preds.append(preds)
  print('********************************************************************************************************')

In [None]:
hat2 = tst_preds[0]
for pred in tst_preds[1:]:
  hat2 += pred

hat2 /= len(tst_preds)

In [None]:
hat2

In [None]:
hat1

In [None]:
hat = (hat1 + hat2)/2
hat

In [None]:
_, predicted_class = hat.max(axis=1)
class_labels = np.array(['cbb', 'cbsd', 'cgm', 'cmd', 'healthy'])
predicted_class_label = class_labels[predicted_class]
names = np.array([item.name for item in test_imgs])

In [None]:
submission = {"Category":predicted_class_label, "Id":names,}
submission_df = pd.DataFrame(submission)
submission_df.head()

In [None]:
submission_df.to_csv('eff5-se_resnext50-cv.csv', columns=["Category","Id"], index = False)

In [None]:
# columns = ['healthy_wheat', 'leaf_rest', 'stem_rust']
# df = pd.DataFrame(hat, columns=columns)
# healthy = df.healthy_wheat.values
# leaf = df.leaf_rest.values
# stem = df.stem_rust.values

In [None]:
# submission = pd.DataFrame({'ID': [s.split('.')[0] for s in os.listdir(test_path)  if s.split('.')[1] != 'jfif']})
# submission['leaf_rust'] = leaf
# submission['stem_rust'] = stem
# submission['healthy_wheat'] = healthy
# submission.head()

In [None]:
# submission.to_csv('Cv-mix-.csv', index = False)