In [1]:
 !pip install -q kaggle

In [2]:
!mkdir ~/.kaggle 

In [3]:
! cp kaggle.json ~/.kaggle/

In [4]:
! chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle competitions download -c plant-seedlings-classification

Downloading plant-seedlings-classification.zip to /content
 99% 1.67G/1.69G [00:10<00:00, 239MB/s]
100% 1.69G/1.69G [00:10<00:00, 171MB/s]


In [None]:
! unzip /content/plant-seedlings-classification.zip

In [7]:
from random import sample
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np 
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import os 
import glob
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import shutil
import warnings
import statistics




warnings.filterwarnings('ignore')

use_gpu = torch.cuda.is_available()
np.random.seed(1234)

In [8]:

train_dir='train'
sz = 224
batch_size = 32
os.listdir(train_dir)
!ls
# trn_fnames = glob.glob(f'/*/*.png')
# trn_fnames[:5]

kaggle.json			    sample_data		   test
plant-seedlings-classification.zip  sample_submission.csv  train


In [9]:
if not os.path.exists("valid"):
  valid="valid"
  os.mkdir(valid)
  array=(os.listdir(train_dir))
  for name in array:
    os.makedirs(os.path.join('valid',name))
  valid_dataset_size=.25
for species in os.listdir(train_dir):
  len_species=len(os.listdir(os.path.join(train_dir,species)))
  number_of_valid=round(valid_dataset_size*len_species)
  valid_data_per_species=sample(os.listdir(os.path.join(train_dir,species)), k=number_of_valid)
  for pic in valid_data_per_species:
    src=os.path.join(train_dir,species,pic)
    destination=os.path.join(valid,species)
    shutil.move(src,destination)
else:
  print("validation dataset is already exist!")
# random.choices(list, k=3)


valid_dir='valid'

validation dataset is already exist!


In [10]:
tfms = transforms.Compose([
    transforms.Resize((sz, sz)),  # PIL Image
    transforms.ToTensor(),        # Tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])



In [11]:
train_ds = datasets.ImageFolder(train_dir,tfms)
valid_ds = datasets.ImageFolder(valid_dir,tfms)



In [12]:
train_dl = torch.utils.data.DataLoader(train_ds, 
                                       batch_size=batch_size, 
                                       shuffle=True, 
                                       num_workers=8)
val_dl = torch.utils.data.DataLoader(valid_ds, 
                                       batch_size=batch_size, 
                                       shuffle=True, 
                                       num_workers=8)


In [13]:
class SimpleCNN(nn.Module):
    
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))

    
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc = nn.Linear(56 * 56 * 32,12)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)            # (bs, C, H,  W)
        out = out.view(out.size(0), -1)  # (bs, C * H * W)
        out = self.fc(out)
        return out

In [14]:
model=SimpleCNN()
Loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.9)

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)

In [16]:
num_epochs = 10
losses = []
for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(train_dl):
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        # forwad pass
        outputs = model(inputs)
        
        # loss
        loss = Loss(outputs, targets)
        losses.append(loss.item())

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        
        # update parameters
        optimizer.step()
        
        # report
        if (i + 1) % 100 == 0:
            print('Epoch [%2d/%2d], Step [%3d/%3d], Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_ds) // batch_size, loss.item()))

Epoch [ 1/10], Step [100/111], Loss: 1.5570
Epoch [ 2/10], Step [100/111], Loss: 1.1131
Epoch [ 3/10], Step [100/111], Loss: 0.9589
Epoch [ 4/10], Step [100/111], Loss: 0.7067
Epoch [ 5/10], Step [100/111], Loss: 1.1449
Epoch [ 6/10], Step [100/111], Loss: 0.4004
Epoch [ 7/10], Step [100/111], Loss: 0.3231
Epoch [ 8/10], Step [100/111], Loss: 0.0954
Epoch [ 9/10], Step [100/111], Loss: 0.1294
Epoch [10/10], Step [100/111], Loss: 0.0153


In [20]:

def evaluate_model(model, dataloader, device):
    model.eval()  # for batch normalization layers
    corrects = 0
    loss=[]
    
    with torch.no_grad():
        for i,(inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss.append(Loss(outputs, targets).item())

            _, preds = torch.max(outputs, 1)
            corrects += (preds == targets.data).sum()
    acc=100. * corrects / len(dataloader.dataset)
    print("loss is {} , acc is {} ".format(statistics.mean(loss),acc))

In [None]:

evaluate_model(model, val_dl, device)



In [31]:
train_transforms = transforms.Compose([
    transforms.Resize((sz, sz)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.01),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


valid_transforms = transforms.Compose([
    transforms.Resize((sz, sz)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])




In [32]:
train_ds = datasets.ImageFolder(train_dir,train_transforms)
valid_ds = datasets.ImageFolder(valid_dir,valid_transforms)
train_dl = torch.utils.data.DataLoader(train_ds, 
                                       batch_size=batch_size, 
                                       shuffle=True, 
                                       num_workers=8)
val_dl = torch.utils.data.DataLoader(valid_ds, batch_size=batch_size, shuffle=True)

In [33]:
num_epochs = 10
losses = []
for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(train_dl):
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        # forwad pass
        outputs = model(inputs)
        
        # loss
        loss = Loss(outputs, targets)
        losses.append(loss.item())

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        
        # update parameters
        optimizer.step()
        
        # report
        if (i + 1) % 100 == 0:
            print('Epoch [%2d/%2d], Step [%3d/%3d], Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_ds) // batch_size, loss.item()))

Epoch [ 1/10], Step [100/111], Loss: 0.7933
Epoch [ 2/10], Step [100/111], Loss: 0.7770
Epoch [ 3/10], Step [100/111], Loss: 0.6673
Epoch [ 4/10], Step [100/111], Loss: 0.5982
Epoch [ 5/10], Step [100/111], Loss: 0.4300
Epoch [ 6/10], Step [100/111], Loss: 0.8325
Epoch [ 7/10], Step [100/111], Loss: 0.6383
Epoch [ 8/10], Step [100/111], Loss: 0.3123
Epoch [ 9/10], Step [100/111], Loss: 0.6842
Epoch [10/10], Step [100/111], Loss: 0.6201


In [35]:
evaluate_model(model, val_dl, device)

loss is 0.6707617079741076 , acc is 78.04878234863281 


In [36]:
model = models.vgg16(pretrained=True)
model.classifier._modules['6'] = nn.Linear(4096, 12)
Loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.9)
model = model.to(device)




Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [37]:
train_ds = datasets.ImageFolder(train_dir,tfms)
valid_ds = datasets.ImageFolder(valid_dir,tfms)
train_dl = torch.utils.data.DataLoader(train_ds, 
                                       batch_size=batch_size, 
                                       shuffle=True, 
                                       num_workers=8)
val_dl = torch.utils.data.DataLoader(valid_ds, batch_size=batch_size, shuffle=True)





In [38]:
num_epochs = 3
losses = []
for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(train_dl):
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        # forwad pass
        outputs = model(inputs)
        
        # loss
        loss = Loss(outputs, targets)
        losses.append(loss.item())

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        
        # update parameters
        optimizer.step()
        
        # report
        if (i + 1) % 100 == 0:
            print('Epoch [%2d/%2d], Step [%3d/%3d], Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_ds) // batch_size, loss.item()))

Epoch [ 1/ 3], Step [100/111], Loss: 0.5275
Epoch [ 2/ 3], Step [100/111], Loss: 0.2523
Epoch [ 3/ 3], Step [100/111], Loss: 0.0981


In [39]:
evaluate_model(model, val_dl, device)

loss is 0.24211767686708635 , acc is 91.58956909179688 
