## Informasi 

link Dataset : https://www.kaggle.com/jerzydziewierz/bee-vs-wasp <br>
credit : <br> sebagian besar code diambil dari https://www.kaggle.com/erntkn/pytorch-starter-with-resnet18 , dalam colab ini code diubah dari pytorch ke pytorch lightning dan diberikan tambahan model CNN untuk pembelajaran. 

In [1]:
! pip install pytorch-lightning --quiet

[K     |████████████████████████████████| 552kB 12.2MB/s 
[K     |████████████████████████████████| 829kB 49.6MB/s 
[K     |████████████████████████████████| 92kB 13.1MB/s 
[K     |████████████████████████████████| 276kB 53.5MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone
  Building wheel for PyYAML (setup.py) ... [?25l[?25hdone


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from zipfile import ZipFile

with ZipFile('/content/drive/My Drive/Dataset/bee vs wasp/archive.zip', 'r') as zipObj:
  zipObj.extractall()

In [4]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torchvision import transforms , models
from torch.utils.data import DataLoader , Dataset
import pytorch_lightning as pl
from pytorch_lightning.metrics import Accuracy 
from pytorch_lightning.metrics.functional.classification import to_categorical 
from torch.utils.data import random_split
from sklearn.metrics import classification_report , confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import cv2
from skimage import io, color
from PIL import Image
from sklearn.preprocessing import LabelEncoder

In [5]:
df = pd.read_csv("/content/kaggle_bee_vs_wasp/labels.csv")
df.head()

Unnamed: 0,id,path,is_bee,is_wasp,is_otherinsect,is_other,photo_quality,is_validation,is_final_validation,label
0,1,bee1\10007154554_026417cfd0_n.jpg,1,0,0,0,1,0,0,bee
1,2,bee1\10024864894_6dc54d4b34_n.jpg,1,0,0,0,1,0,1,bee
2,3,bee1\10092043833_7306dfd1f0_n.jpg,1,0,0,0,1,1,0,bee
3,4,bee1\1011948979_fc3637e779_w.jpg,1,0,0,0,1,0,1,bee
4,5,bee1\10128235063_dca17db76c_n.jpg,1,0,0,0,1,0,0,bee


In [6]:
for i in df.index:
    df["path"].iloc[i] = df["path"].iloc[i].replace("\\", "/")
le = LabelEncoder()
le.fit(df["label"])
df["label"] = le.transform(df["label"])
df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0,id,path,is_bee,is_wasp,is_otherinsect,is_other,photo_quality,is_validation,is_final_validation,label
0,1,bee1/10007154554_026417cfd0_n.jpg,1,0,0,0,1,0,0,0
1,2,bee1/10024864894_6dc54d4b34_n.jpg,1,0,0,0,1,0,1,0
2,3,bee1/10092043833_7306dfd1f0_n.jpg,1,0,0,0,1,1,0,0
3,4,bee1/1011948979_fc3637e779_w.jpg,1,0,0,0,1,0,1,0
4,5,bee1/10128235063_dca17db76c_n.jpg,1,0,0,0,1,0,0,0


In [7]:
def split_data(dt):
    idx = list()
    a = pd.DataFrame()
    b = pd.DataFrame()
    for i in df.index:
        if dt["is_validation"].iloc[i] == 1:
            a = a.append(dt.iloc[i])
            idx.append(i)
        if dt["is_final_validation"].iloc[i] == 1:    
            b = b.append(dt.iloc[i])
            idx.append(i)

    dt = dt.drop(dt.index[idx])
    dt = dt.reset_index()
    a = a.reset_index()
    b = b.reset_index()
    return dt, a, b 

train_df, val_df, test_df = split_data(df)
val_df.label = val_df.label.astype(np.int64)
test_df.label = test_df.label.astype(np.int64)
# sanity check
print("Length of train dataset: ", len(train_df))
print("Length of validation dataset: " ,len(val_df))
print("Length of test dataset: ", len(test_df))

Length of train dataset:  7939
Length of validation dataset:  1719
Length of test dataset:  1763


In [8]:
class BeeDataset(Dataset):
    def __init__(self, df:pd.DataFrame, imgdir:str, train:bool,
                 transforms=None):
        self.df = df
        self.imgdir = imgdir
        self.train = train
        self.transforms = transforms
    
    def __getitem__(self, index):
        im_path = os.path.join(self.imgdir, self.df.iloc[index]["path"])
        x = cv2.imread(im_path)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
        x = cv2.resize(x, (224, 224))

        if self.transforms:
            x = self.transforms(x)
        
        if self.train:
            y = self.df.iloc[index]["label"]
            return x, y
        else:
            return x
    
    def __len__(self):
        return len(self.df)

In [9]:
class BeeDataModule(pl.LightningDataModule):
    def __init__(self , batch_size=64 , train_df=None , val_df=None , test_df=None):
        super().__init__()
        self.batch_size = batch_size
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
    
    def setup(self,stage):
        train_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        test_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        self.train_data = BeeDataset(df=self.train_df,
                        imgdir="/content/kaggle_bee_vs_wasp",
                        train=True,
                        transforms=train_transform)

        self.val_data = BeeDataset(df=self.val_df,
                              imgdir="/content/kaggle_bee_vs_wasp",
                              train=True,
                              transforms=test_transform)

        self.test_data = BeeDataset(df=self.test_df,
                              imgdir="/content/kaggle_bee_vs_wasp",
                              train=True,
                              transforms=test_transform)
      
    
    def train_dataloader(self):
        return DataLoader(self.train_data , shuffle=True , batch_size= self.batch_size)
    
    def val_dataloader(self):
         return DataLoader(self.val_data , batch_size= self.batch_size)
    
    def test_dataloader(self):
        return DataLoader(self.test_data , batch_size= self.test_data.__len__())
    

In [12]:
class CNNBeeClassifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16 , 7)
        self.conv2 = nn.Conv2d(16,32,5)
        self.maxpooling1 = nn.MaxPool2d(5,stride=5)
        self.bacthnorm1 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64 , 5 , padding=1)
        self.conv4 = nn.Conv2d(64,128,3, padding=1)
        self.maxpooling2 = nn.MaxPool2d(3,stride=3)
        self.bacthnorm2 = nn.BatchNorm2d(128)
        self.conv5 = nn.Conv2d(128, 128 , 3 , padding=1)
        self.fc1 = nn.Linear(21632, 128)
        self.fc2 = nn.Linear(128,32)
        self.output = nn.Linear(32,4)
       
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.maxpooling1(x)
        x = self.bacthnorm1(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.maxpooling2(x)
        x = self.bacthnorm2(x)
        x = self.conv5(x)
        x = F.relu(x)
        x = x.view(x.size(0),-1)
        # print(x.size())
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        return self.output(x)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def training_step(self,batch,batch_idx):
        x , y = batch
        logits = self(x)
        loss = F.cross_entropy(logits,y)
        # result = pl.TrainResult(loss)
        accuracy = Accuracy()
        acc = accuracy(torch.tensor(logits).cpu(),torch.tensor(y).cpu())
        pbar = {'training_acc': acc}
        # result = pl.TrainResult(loss)
        return {'loss' : loss , 'progress_bar':pbar}
    
    def validation_step(self , batch , batch_idx):
        result = self.training_step(batch,batch_idx)
        result['progress_bar']['val_acc'] = result['progress_bar']['training_acc']
        return result
    
    def validation_epoch_end(self , val_step_outputs):
        avg_loss = torch.stack([x['loss'] for x in val_step_outputs]).mean()
        avg_acc =  torch.stack([x['progress_bar']['val_acc'] for x in val_step_outputs]).mean()
        tqdm_dict = {'val_loss': avg_loss , 'val_acc': avg_acc}
        return {
                'progress_bar': tqdm_dict,
                'log': {'val_loss': avg_loss , 'val_acc': avg_acc},
                }
    
    def test_step(self , batch , batch_idx):
        x , y = batch
        logits = self(x)
        loss = F.cross_entropy(logits,y)
        logits = torch.tensor(logits)
        accuracy = Accuracy()
        acc = accuracy(logits.cpu(), torch.tensor(y).cpu())
        logits = to_categorical(logits)

        pbar = {'test_acc': acc }
        print(classification_report(y.cpu(),logits.cpu()))
        print(confusion_matrix(y.cpu(),logits.cpu()))
    
        # result = pl.TrainResult(loss)
        return {'loss' : loss , 'progress_bar':pbar}
    

In [13]:
model = CNNBeeClassifier()
x = model.forward(torch.randn(1,3,224,224))

torch.Size([1, 21632])


In [14]:
class ResnetBeeClassifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(512, 4)
       
        
    def forward(self,x):
        x = self.model(x)
        return x
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def training_step(self,batch,batch_idx):
        x , y = batch
        logits = self(x)
        loss = F.cross_entropy(logits,y)
        # result = pl.TrainResult(loss)
        accuracy = Accuracy()
        acc = accuracy(torch.tensor(logits).cpu(),torch.tensor(y).cpu())
        pbar = {'training_acc': acc}
        # result = pl.TrainResult(loss)
        return {'loss' : loss , 'progress_bar':pbar}
    
    def validation_step(self , batch , batch_idx):
        result = self.training_step(batch,batch_idx)
        result['progress_bar']['val_acc'] = result['progress_bar']['training_acc']
        return result
    
    def validation_epoch_end(self , val_step_outputs):
        avg_loss = torch.stack([x['loss'] for x in val_step_outputs]).mean()
        avg_acc =  torch.stack([x['progress_bar']['val_acc'] for x in val_step_outputs]).mean()
        tqdm_dict = {'val_loss': avg_loss , 'val_acc': avg_acc}
        return {
                'progress_bar': tqdm_dict,
                'log': {'val_loss': avg_loss , 'val_acc': avg_acc},
                }
    
    def test_step(self , batch , batch_idx):
        x , y = batch
        logits = self(x)
        loss = F.cross_entropy(logits,y)
        logits = torch.tensor(logits)
        accuracy = Accuracy()
        acc = accuracy(logits.cpu(), torch.tensor(y).cpu())
        logits = to_categorical(logits)

        pbar = {'test_acc': acc }
        print(classification_report(y.cpu(),logits.cpu()))
        print(confusion_matrix(y.cpu(),logits.cpu()))
    
        # result = pl.TrainResult(loss)
        return {'loss' : loss , 'progress_bar':pbar}
    

In [11]:
pl.seed_everything(1234)

dm = BeeDataModule(batch_size=16,train_df=train_df,val_df=val_df,test_df=test_df)
model = CNNBeeClassifier()

# training
trainer = pl.Trainer(gpus=1, max_epochs=20 , progress_bar_refresh_rate=50)
trainer.fit(model, dm)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name        | Type        | Params
---------------------------------------------
0  | conv1       | Conv2d      | 2 K   
1  | conv2       | Conv2d      | 12 K  
2  | maxpooling1 | MaxPool2d   | 0     
3  | bacthnorm1  | BatchNorm2d | 64    
4  | conv3       | Conv2d      | 51 K  
5  | conv4       | Conv2d      | 73 K  
6  | maxpooling2 | MaxPool2d   | 0     
7  | bacthnorm2  | BatchNorm2d | 256   
8  | conv5       | Conv2d      | 147 K 
9  | fc1         | Linear      | 2 M   
10 | fc2         | Linear      | 4 K   
11 | output      | Linear      | 132   


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



Please use self.log(...) inside the lightningModule instead.

# log on a step or aggregate epoch metric to the logger and/or progress bar
# (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
Please use self.log(...) inside the lightningModule instead.

# log on a step or aggregate epoch metric to the logger and/or progress bar
# (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




1

In [12]:
pl.seed_everything(1234)

dm = BeeDataModule(batch_size=32,train_df=train_df,val_df=val_df,test_df=test_df)
model = ResnetBeeClassifier()

# training
trainer = pl.Trainer(gpus=1, max_epochs=20 , progress_bar_refresh_rate=50)
trainer.fit(model, dm)
trainer.test(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…





Please use self.log(...) inside the lightningModule instead.

# log on a step or aggregate epoch metric to the logger and/or progress bar
# (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
Please use self.log(...) inside the lightningModule instead.

# log on a step or aggregate epoch metric to the logger and/or progress bar
# (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

RuntimeError: ignored