<a href="https://colab.research.google.com/github/Maestro2496/Machine-Learning/blob/main/DeepLearningAssessment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import packages

In [1]:
!pip install -q opendatasets

In [66]:
# pytorch, os, torchvision, opendatasets
# pandas
import os
import shutil
import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import opendatasets as od
import pandas as pd
import torchvision
from torchvision import datasets
from torchvision.transforms import transforms
from torchvision.datasets import ImageFolder
from tqdm.auto import tqdm
import matplotlib.pyplot as plt


## Loading Dataset

In [3]:
od.download('https://www.kaggle.com/datasets/francismon/curated-colon-dataset-for-deep-learning')

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: medetonagossou
Your Kaggle Key: ··········
Downloading curated-colon-dataset-for-deep-learning.zip to ./curated-colon-dataset-for-deep-learning


100%|██████████| 1.41G/1.41G [00:15<00:00, 98.5MB/s]





### Select random images

In [37]:
def select_random_images(source_folder, num_images, destination_folder):
    dataset = ImageFolder(root=source_folder)

    images = dataset.imgs
    labels = dataset.classes
    random.shuffle(images)
    selected_images = images[:num_images]

    for label in labels:
      label_folder = os.path.join(destination_folder, label)
      os.makedirs(label_folder, exist_ok=True)

    for image_path, label in selected_images:
      filename = os.path.basename(image_path)
      label_folder = os.path.join(destination_folder, dataset.classes[label])
      destination_path = os.path.join(label_folder, filename)
      shutil.copyfile(image_path, destination_path)


In [38]:
!rm -r small_data/

In [39]:
select_random_images("curated-colon-dataset-for-deep-learning/train", 20, "small_data/train")
select_random_images("curated-colon-dataset-for-deep-learning/test", 20, "small_data/val")
select_random_images("curated-colon-dataset-for-deep-learning/test", 20, "small_data/test")

### Image transformer

In [40]:
transformer = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

## Constructing datasets

In [23]:
test_folder_path = "small_data"
main_folder_path = "curated-colon-dataset-for-deep-learning"

In [49]:
train_dataset = datasets.ImageFolder(f"{test_folder_path}/train", transform=transformer)
val_dataset = datasets.ImageFolder(f"{test_folder_path}/val", transform=transformer)
test_dataset = datasets.ImageFolder(f"{test_folder_path}/test", transform=transformer)

In [50]:
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, pin_memory=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=True, pin_memory=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

In [46]:
def imshow(inp, title=None):
  inp = inp.numpy().transpose((1, 2, 0))
  plt.imshow(inp)

  if title is not None:
    plt.title(title)
  plt.pause(0.001)

In [58]:
inputs, classes = next(iter(train_loader))
out = torchvision.utils.make_grid(inputs)


## Model

In [62]:
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        # The arguments for commonly used modules:
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)

        # input image size: [3, 224, 224]

        self.cnn_layers = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(128, 256, 3, 1, 1), # TODO: set a convolution layer with out_channels=256, kernel_size=3, stride=1, padding=1
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0), # TODO: set a max pooling layer with kernel_size=4, stride=4, padding=0
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(256 * 14 * 14, 256), # TODO: fill in the correct input feature size after flatten. The tensor shape before flatten is (256, 14, 14)
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 7)
        )

    def forward(self, x):
        # input (x): [batch_size, 3, 224, 224]
        # output: [batch_size, 7]

        # Extract features by convolutional layers.
        x = self.cnn_layers(x)

        # The extracted feature map must be flatten before going to fully-connected layers.
        x = x.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        x = self.fc_layers(x)

        return x

In [63]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = MyCNN()

model = model.to(device)

## Loss and Optimizer

In [65]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)

## Train and Validation

In [75]:
n_epochs = 1
train_loss_epochs = []
train_acc_epochs = []

for epoch in range(n_epochs):
  ## Training
  model.train()
  train_loss = []
  train_accs = []
  for images,labels in tqdm(train_loader):

    optimizer.zero_grad()

    images = images.to(device)
    labels = labels.to(device)

    logits = model(images)

    loss = criterion(logits, labels)
    acc = (logits.argmax(dim=-1) == labels).float().mean()

    loss.backward()

    optimizer.step()

    train_loss.append(loss.item())

    train_accs.append(acc.item())

  train_loss_epochs.append(sum(train_loss) / len(train_loss))
  train_acc_epochs.append(sum(train_accs) / len(train_accs))


  0%|          | 0/10 [00:00<?, ?it/s]