# 0. Imports

In [4]:
# imports
# imports

import torch
from torch import nn
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
from torchsummary import summary
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from tqdm.auto import tqdm
import time
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
# for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x1052e1ea910>

In [3]:
# setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
device

device(type='cuda')

# 1. Dataset and Dataloader

In [46]:
# get the dataset
from torchvision.datasets import ImageFolder

transforms = transforms.Compose(
    [
        transforms.Resize((48, 64)),
        transforms.ToTensor()
    ]
)
train_dataset = ImageFolder("dataset/train", transform=transforms)
val_dataset = ImageFolder("dataset/test", transform=transforms)

In [48]:
train_dataset[0][0].shape

torch.Size([3, 48, 64])

In [49]:
# mapping
train_dataset.class_to_idx

{'Bikes': 0, 'Horses': 1}

In [50]:
# create dataloaders
batch_size = 4
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# 2. Create CNN

In [63]:
class BikeHorseClassifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding="same"),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding="same"),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding="same"),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.block_4 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=128*6*8, out_features=64),
            nn.ReLU(),
        )
        self.last_layer = nn.Linear(in_features=64, out_features=10)
        
    def forward(self, x):
        features = self.block_4(self.block_3(self.block_2(self.block_1(x))))
        activations = self.last_layer(features)
        return (activations, features)

In [64]:
model = BikeHorseClassifier().to(device)
summary(model, (3, 48, 64))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 48, 64]             896
       BatchNorm2d-2           [-1, 32, 48, 64]              64
              ReLU-3           [-1, 32, 48, 64]               0
         MaxPool2d-4           [-1, 32, 24, 32]               0
            Conv2d-5           [-1, 64, 24, 32]          18,496
       BatchNorm2d-6           [-1, 64, 24, 32]             128
              ReLU-7           [-1, 64, 24, 32]               0
         MaxPool2d-8           [-1, 64, 12, 16]               0
            Conv2d-9          [-1, 128, 12, 16]          73,856
      BatchNorm2d-10          [-1, 128, 12, 16]             256
             ReLU-11          [-1, 128, 12, 16]               0
        MaxPool2d-12            [-1, 128, 6, 8]               0
          Flatten-13                 [-1, 6144]               0
           Linear-14                   

# 3. Training Loop

In [65]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device
              ):
    
    start_time = time.time()

    train_loss = 0    
    model.to(device)
    model.train()
    
    for (X, y) in data_loader:
        # send data to GPU
        X, y = X.to(device), y.to(device)
        # X, y = X.to(device), y.type(torch.LongTensor).to(device)
        
        # 1. forward pass
        y_pred, _ = model(X)

        # 2. calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        
        # 3. optimizer zero grad
        optimizer.zero_grad()
        
        # 4. loss backward
        loss.backward()
        
        # 5. optimizer step
        optimizer.step()
    
    train_loss /= len(data_loader)

    end_time = time.time()

    return {"avg_batch_loss": train_loss, "time": (end_time - start_time)* 10**3}

def valid_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               device: torch.device
              ):
    
    # send the model to device
    model.to(device)

    # send the model in eval mode
    model.eval()

    # for confusion matrix and accuracy
    y_true = torch.Tensor([]).to(device)
    y_pred = torch.Tensor([]).to(device)

    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred, _ = model(X)
            
            y_true = torch.cat((y_true, y), dim=0)
            y_pred = torch.cat((y_pred, test_pred.argmax(axis=1)), dim=0)
        
        # send back to cpu
        y_true = y_true.cpu()
        y_pred = y_pred.cpu()

        return {"accuracy": accuracy_score(y_true, y_pred), "confusion_matrix": confusion_matrix(y_true, y_pred, normalize="true")}


In [66]:
# create loss_fn
loss_fn = CrossEntropyLoss()

In [67]:
# create optimizer

lr = 3.2 * (10**-4)
optimizer = Adam(model.parameters(), lr=lr)

In [68]:
epochs = 32

for epoch in tqdm(range(epochs)):
        tres = train_step(model, train_dataloader, loss_fn, optimizer, device)
        print(f"epoch: {epoch}")
        print(f"avg_batch_loss: {tres['avg_batch_loss']}")
        print(f"time: {tres['time']}")   
        print("")

  0%|          | 0/32 [00:00<?, ?it/s]

epoch: 0
avg_batch_loss: 2.4390485286712646
time: 933.5768222808838

epoch: 1
avg_batch_loss: 2.033043146133423
time: 613.4030818939209

epoch: 2
avg_batch_loss: 0.6366634964942932
time: 646.1994647979736

epoch: 3
avg_batch_loss: 0.4491591453552246
time: 618.9291477203369

epoch: 4
avg_batch_loss: 0.2998581528663635
time: 622.9219436645508

epoch: 5
avg_batch_loss: 0.19932998716831207
time: 636.0793113708496

epoch: 6
avg_batch_loss: 0.14700278639793396
time: 611.4358901977539

epoch: 7
avg_batch_loss: 0.10592490434646606
time: 597.4242687225342

epoch: 8
avg_batch_loss: 0.07511460036039352
time: 625.1246929168701

epoch: 9
avg_batch_loss: 0.05314221978187561
time: 628.8480758666992

epoch: 10
avg_batch_loss: 0.03473154082894325
time: 610.7702255249023

epoch: 11
avg_batch_loss: 0.023491274565458298
time: 611.7730140686035

epoch: 12
avg_batch_loss: 0.016298340633511543
time: 608.2763671875

epoch: 13
avg_batch_loss: 0.011773131787776947
time: 630.089521408081

epoch: 14
avg_batch_los

In [69]:
# validation results
vres = valid_step(model, val_dataloader, device)
print(f"accuracy: {vres['accuracy']}")
print(f"confusion_matrix: \n{vres['confusion_matrix']}")

accuracy: 1.0
confusion_matrix: 
[[1. 0.]
 [0. 1.]]


# 4. Using CNN as Feature Extractor

In [70]:
# get features
def get_features(model, data_loader):
    # send the model to device
    model.to(device)

    # send the model in eval mode
    model.eval()

    # for confusion matrix and accuracy
    all_y = torch.Tensor([]).to(device)
    all_x = torch.Tensor([]).to(device)

    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            _, features = model(X)
            
            all_y = torch.cat((all_y, y), dim=0)
            all_x = torch.cat((all_x, features), dim=0)
        
        # send back to cpu
        return (all_x.cpu(), all_y.cpu())   

In [71]:
train_x, train_y = get_features(model, train_dataloader)
train_x.shape, train_y.shape

(torch.Size([167, 64]), torch.Size([167]))

In [72]:
val_x, val_y = get_features(model, val_dataloader)
val_x.shape, val_y.shape

(torch.Size([12, 64]), torch.Size([12]))

In [73]:
# try out sklearn models
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 

fmodel = LogisticRegression(max_iter=256)
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 1.0


In [74]:
# random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score 

fmodel = RandomForestClassifier()
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 1.0


In [75]:
# gaussian NB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score 

fmodel = GaussianNB()
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 1.0


In [76]:
# multinomial NB
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score 

fmodel = MultinomialNB()
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 1.0


: 