In [None]:
!wget https://zindpublic.blob.core.windows.net/private/uploads/competition_datafile/file/285/train.zip?sp=r&sv=2015-04-05&sr=b&st=2020-02-12T04%3A36%3A26Z&se=2020-02-12T04%3A52%3A26Z&sig=VHuLaxLlxkug8ahggJk%2BTBPx8AUljIdeRgxAcIJPwA0%3D
!unzip train.zip

In [12]:
import torch
import sys
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from albumentations.pytorch import ToTensor
import numpy as np
import torch.nn as nn
import time, glob, os, cv2
import random

In [18]:
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90, Resize, CenterCrop,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, RandomBrightnessContrast, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, Flip, OneOf, Compose, Normalize
)

In [40]:
class_to_idx = {'healthy_wheat': 0, 'leaf_rust': 1, 'stem_rust': 2}
random.seed(30)

train_transforms = Compose([
        CLAHE(),
        RandomRotate90(),
        Transpose(),
        ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=.75),
        Blur(blur_limit=3),
        OpticalDistortion(),
        GridDistortion(),
        HueSaturationValue(),
        Resize(256, 256, always_apply=True),
        CenterCrop(224,224, always_apply=True),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, always_apply=True),
        ToTensor()
    ], p=0.5)


test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class CropDisease(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
        self.class_to_idx = {'healthy_wheat': 0, 'leaf_rust': 1, 'stem_rust': 2}
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        filename = self.data[idx]
        img = cv2.imread(filename)
        try:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        except:
            pass
        label = filename.split("/")[-2]
        label = torch.tensor(self.class_to_idx[label])
        img = self.transform(image=img)["image"]
        return img, label

root_dir = "/home/yusuf/Documents/cvcd/crop_disease_detection/Data/train"
data = glob.glob(os.path.join(root_dir, "*/*"), recursive=False)
random.shuffle(data)

train_data = data[:800]
test_data = data[800:]

healthy_wheat_count = 0
leaf_rust_count = 0
stem_rust_count = 0

data_classes = [a.split("/")[-2] for a in train_data]
ids = [class_to_idx[i] for i in data_classes]

for i in data_classes:
    if i=="healthy_wheat":
        healthy_wheat_count+=1
    elif i=="leaf_rust":
        leaf_rust_count+=1
    elif i =="stem_rust":
        stem_rust_count+=1
        
class_weights = [len(data_classes)/healthy_wheat_count, len(data_classes)/leaf_rust_count, len(data_classes)/stem_rust_count]
weights = [class_weights[i] for i in ids]

sampler = torch.utils.data.WeightedRandomSampler(weights=weights, num_samples=len(weights))

# test_dir = "/home/yusuf/Documents/cvcd/crop_disease_detection/Data/Test"

train_dataset = CropDisease(train_data, train_transforms)
test_dataset = CropDisease(test_data, train_transforms)

train_loader = DataLoader(train_dataset, batch_size=16, sampler=sampler)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [41]:
sample = next(iter(train_loader))
sample[1]

In [5]:
model = torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x48d_wsl')
#modifying the FC layer of pretrained model
model.fc = nn.Linear(2048, 3) #number of disease types is 3

Using cache found in /home/yusuf/.cache/torch/hub/facebookresearch_WSL-Images_master


In [6]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(1536, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_s

In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.007645)

In [8]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

def train(model, train_loader, optimizer, criterion):
  model.train()
  train_loss = 0
  train_correct = 0
  for data, label in train_loader:
    data = data.to(device)
    label = label.to(device)
    optimizer.zero_grad()
    out = model(data)
    train_correct += (torch.argmax(out, dim=1).eq_(label).sum()).item()
    loss = criterion(out, label)
    train_loss += loss.item()
    loss.backward()
    optimizer.step()
  avg_loss = train_loss/len(train_loader)
  accuracy = train_correct/(len(train_loader.dataset))
  return avg_loss, accuracy

def test(model, test_loader, criterion):
  with torch.no_grad():
    model.eval()
    test_correct = 0
    test_loss = 0
    for data, label in test_loader:
        data = data.to(device)
        label = label.to(device)
        out2 = model(data)
        loss2 = criterion(out2, label)
        test_loss += loss2.item()
        test_correct += (torch.argmax(out2, dim=1).eq_(label).sum()).item()
    avg_loss = test_loss/len(test_loader)
    accuracy = test_correct/len(test_loader.dataset)
  return avg_loss, accuracy


In [9]:
#Training loop

epochs = 300
print_every = 5

train_losses = []
test_losses = []
train_accuracy = []
test_accuracy = []

for i in range(epochs):
  t0 = time.time()
  train_loss, train_acc = train(model, train_loader, optimizer, criterion)
  t1 = time.time()
  print("Train Loss at {}/{} is {} | Train accuracy:{} | Time:{}".format(i+1, epochs, train_loss, train_acc, t1-t0))

  train_losses.append(train_loss)
  train_accuracy.append(train_acc)

#   writer.add_scalar("Fold{}/Loss/Train".format(i), train_loss, i)
#   writer.add_scalar("Fold{}/Accuracy/Train".format(i), train_acc, i)

  if (i+1)%print_every == 0:
      t0 = time.time()
      test_loss, test_acc = test(model, test_loader, criterion)
      t1 = time.time()
      print("Test Loss at {}/{} is {} | Test accuracy:{} | Time:{}".format(i+1, epochs, train_loss, train_acc, t1-t0))

      test_losses.append(test_loss)
      test_accuracy.append(test_acc)

#       writer.add_scalar("Fold{}/Loss/Test".format(i), test_loss, i)
#       writer.add_scalar("Fold{}/Accuracy/Test".format(i), test_acc, i)

  scheduler.step()




error: OpenCV(4.1.1) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [11]:
train_losses

[]

In [1]:
import pandas as pd

In [43]:
submission = pd.read_csv("submission_3.csv")

In [44]:
submission.head()

Unnamed: 0,ID,leaf_rust,stem_rust,healthy_wheat
0,KH8YZM,0.014638,0.002214,0.983148
1,QZL820,0.999979,1e-05,1.1e-05
2,CRAO3W,0.948259,0.001324,0.050418
3,ZOXDKA,0.029133,0.967601,0.003266
4,I83ILL,0.128968,0.867607,0.003425


In [45]:
new_df = submission[["leaf_rust", "stem_rust", "healthy_wheat"]]

In [46]:
import numpy as np
import pandas as pd
d = pd.DataFrame(np.zeros((610, 3)), columns=["leaf_rust", "stem_rust", "healthy_wheat"])
d

Unnamed: 0,leaf_rust,stem_rust,healthy_wheat
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
...,...,...,...
605,0.0,0.0,0.0
606,0.0,0.0,0.0
607,0.0,0.0,0.0
608,0.0,0.0,0.0


In [50]:
for i in range(610):
    z = new_df.iloc[i].idxmax
    new_df.iloc[i][z]=1

In [51]:
new_df["ID"] = submission["ID"]

In [52]:
new_df.to_csv("manipulated_submission3.csv")

In [53]:
10//10

1

In [54]:
10%10

0