<a href="https://colab.research.google.com/github/HurleyJames/GoogleColabExercise/blob/master/CW1_Starter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## COMP5623 Coursework on Image Classification with Convolutional Neural Networks 

Starter code.

In [12]:
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
import torchvision.transforms as transforms
from  torch.utils.data import Dataset

from sklearn.metrics import confusion_matrix
from skimage import io, transform

import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image
import pandas as pd
import numpy as np
import csv
import os
import math
import cv2

NameError: ignored

### Part I

The first part of the assignment is to build a CNN and train it on a subset of the ImageNet dataset. We will first create a dataframe with all the references to the images and their labels.

To download the images into your work environment, clone into a git respository containing the images.

In [0]:
! git clone https://github.com/MohammedAlghamdi/imagenet10.git

Cloning into 'imagenet10'...
remote: Enumerating objects: 10019, done.[K
remote: Total 10019 (delta 0), reused 0 (delta 0), pack-reused 10019[K
Receiving objects: 100% (10019/10019), 966.71 MiB | 52.38 MiB/s, done.
Resolving deltas: 100% (2/2), done.
Checking out files: 100% (10002/10002), done.


Check that the repository is there:

In [0]:
! ls

imagenet10  sample_data


In [0]:
root_dir = "imagenet10/train_set/"
class_names = [
  "baboon",
  "banana",
  "canoe",
  "cat",
  "desk",
  "drill",
  "dumbbell",
  "football",
  "mug",
  "orange",
]

A helper function for reading in images and assigning labels.

In [0]:
def get_meta(root_dir, dirs):
    """ Fetches the meta data for all the images and assigns labels.
    """
    paths, classes = [], []
    for i, dir_ in enumerate(dirs):
        for entry in os.scandir(root_dir + dir_):
            if (entry.is_file()):
                paths.append(entry.path)
                classes.append(i)
                
    return paths, classes

Now we create a dataframe using all the data.

In [0]:
# Benign images we will assign class 0, and malignant as 1
paths, classes = get_meta(root_dir, class_names)

data = {
    'path': paths,
    'class': classes
}

data_df = pd.DataFrame(data, columns=['path', 'class'])
data_df = data_df.sample(frac=1).reset_index(drop=True) # Shuffles the data

View some sample data.

In [0]:
print("Found", len(data_df), "images.")
data_df.head()

Found 9000 images.


Unnamed: 0,path,class
0,imagenet10/train_set/banana/n07753592_3748.JPEG,1
1,imagenet10/train_set/cat/n02123159_2359.JPEG,3
2,imagenet10/train_set/drill/n03239726_19239.JPEG,5
3,imagenet10/train_set/canoe/n02951358_12254.JPEG,2
4,imagenet10/train_set/drill/n03239726_4405.JPEG,5


Now we will create the Dataset class.

In [0]:
class ImageNet10(Dataset):
    """ ImageNet10 dataset. """

    def __init__(self, df, transform=None):
        """
        Args:
            image_dir (string): Directory with all the images
            df (DataFrame object): Dataframe containing the images, paths and classes
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # Load image from path and get label
        x = Image.open(self.df['path'][index])
        try:
          x = x.convert('RGB') # To deal with some grayscale images in the data
        except:
          pass
        y = torch.tensor(int(self.df['class'][index]))

        if self.transform:
            x = self.transform(x)

        return x, y

Compute what we should normalise the dataset to.

In [0]:
def compute_img_mean_std(image_paths):
    """
        Author: @xinruizhuang. Computing the mean and std of three channel on the whole dataset,
        first we should normalize the image from 0-255 to 0-1
    """

    img_h, img_w = 224, 224
    imgs = []
    means, stdevs = [], []

    for i in tqdm(range(len(image_paths))):
        img = cv2.imread(image_paths[i])
        img = cv2.resize(img, (img_h, img_w))
        imgs.append(img)

    imgs = np.stack(imgs, axis=3)
    print(imgs.shape)

    imgs = imgs.astype(np.float32) / 255.

    for i in range(3):
        pixels = imgs[:, :, i, :].ravel()  # resize to one row
        means.append(np.mean(pixels))
        stdevs.append(np.std(pixels))

    means.reverse()  # BGR --> RGB
    stdevs.reverse()

    print("normMean = {}".format(means))
    print("normStd = {}".format(stdevs))
    return means, stdevs


In [0]:
norm_mean, norm_std = compute_img_mean_std(paths)

100%|██████████| 9000/9000 [00:44<00:00, 201.29it/s]


(224, 224, 3, 9000)
normMean = [0.52283585, 0.47988057, 0.40605134]
normStd = [0.29770675, 0.28883967, 0.31178257]


Now let's create the transforms to normalise and turn our data into tensors.

In [0]:
data_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(256),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std),
    ])

Let's split the data into train and test sets and instantiate our new ISIC_Dataset objects.

In [0]:
train_split = 0.70 # Defines the ratio of train/valid/test data.
valid_split = 0.10
# 测试集
# test_split = 0.20

train_size = int(len(data_df)*train_split)
valid_size = int(len(data_df)*valid_split)
# 测试集大小
# test_size = int(len(data_df)*test_split)

ins_dataset_train = ImageNet10(
    df=data_df[:train_size],
    transform=data_transform,
)

ins_dataset_valid = ImageNet10(
    df=data_df[train_size:(train_size + valid_size)].reset_index(drop=True),
    transform=data_transform,
)

ins_dataset_test = ImageNet10(
    df=data_df[(train_size + valid_size):].reset_index(drop=True),
    transform=data_transform,
)

You will need to create DataLoaders for the datasets.

In [0]:
# 自己写的
train_loader = torch.utils.data.DataLoader(
    ins_dataset_train,
    batch_size=16,
    shuffle=True,
    num_workers=2
)

valid_loader = torch.utils.data.DataLoader(
    ins_dataset_valid,
    batch_size=20,
    shuffle=True,
    num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    ins_dataset_test,
    batch_size=24,
    shuffle=True,
    num_workers=2
)

In [0]:
for i, data in enumerate(test_loader, 0):
  images, labels = data
  print("Batch", i, "size:", len(images))

  break;

Batch 0 size: 24


In [0]:
len(test_loader)

75

In [0]:
# 创建一个 FeatureFlatten层级
class FeatureFlatten(nn.Module):
  def __init__(self):
    super(FeatureFlatten, self).__init()__()
  def forward(self, x):
    x = x.view(-1, self.num_flat_features(x))
    return x
  def num_flat_features(self, x):
    size = x.size()[1:]
    num_features = 1
    for s in size:
      num_features *= s
    return num_features
  def __repr__(self):
    return 'FeatureFlatten()'

SyntaxError: ignored

In [0]:
# 定义 VariableSizeInspector 网络层
class VariableSizeInspector(nn.Module):
  def __init__(self):
    super(VariableSizeInspector, self).__init__()
  def forward(self, x):
    print('after', type(x.creator), x.size())
    return x
  def __repr__(self):
    return 'VariableSizeInspector()'

A framework for the ConvNet model, missing all layers except the final fully-connected layer:

In [0]:
# Convolutional neural network
class ConvNet(nn.Module):
    
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
  
        # Add network layers here
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.3),
            nn.Conv2d(16, 24, 4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),
            nn.Conv2d(24, 32, 4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),
            VariableSizeInspector(),
            FeatureFlatten(),
            VariableSizeInspector()
        )
        # first layer
        # self.conv1 = nn.Sequential(
        #     nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size=2),
        #     nn.Dropout(0.3)
        # )
        
        # # second layer
        # self.conv2 = nn.Sequential(
        #     nn.Conv2d(16, 24, 4),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2),
        #     nn.Dropout(0.3)
        # )
        
        # # third layer
        # self.conv3 = nn.Sequential(
        #     nn.Conv2d(24, 32, 4),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2),
        #     nn.Dropout(0.3)
        # )
    
        self.fc = nn.Sequential(
            nn.Linear(32*29*29, 512),
            # nn.Linear(32*32*32, 512),
            # nn.ReLU(),
            # nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
        
        # self.fc2 = nn.Linear(512, num_classes)
        self.final = nn.Softmax(dim=1)
        
    def forward(self, x):

        # Complete the graph
        # print(x.size())
        # print(self.conv.weight.size())
      
        # x = self.conv1(x)
        # x = self.conv2(x)
        # x = self.conv3(x)
        # x = x.view(x.size(0), -1)

        # out = x.reshape(x.size(0), -1)
        x = self.conv(x)
        x = v.view(-1, self.num_flat_features(x))
        x = self.fc(x)
        return x

        # x = x.view(x.size(0), -1)
        # x = self.encoder(x)
        # x = self.decoder(x)
        # return x


model = ConvNet()
print(model)


ConvNet(
  (fc): Sequential(
    (0): Linear(in_features=512, out_features=10, bias=True)
  )
  (final): Softmax(dim=1)
)


In [0]:
from torch import optim

torch.manual_seed(0)

# # 检查GPU是否可用
# cuda_avail = torch.cuda.is_available()

# # 创建模型、优化器和损失函数
# model = ConvNet(num_classes=10)

# # 若GPU可用，将模型移往GPU
# if cuda_avail:
#   model.cuda()

model.fc

Sequential(
  (0): Linear(in_features=512, out_features=10, bias=True)
)

In [0]:
# 定义优化器和损失函数
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)
loss_fn = nn.CrossEntropyLoss()

In [0]:
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device

In [0]:
# model_gpu = ConvNet().to(device)

In [0]:
import timeit
def train_model_epochs(num_epochs):
  for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):

      images, labels = data
      # images = images.to(device)
      # labels = labels.to(device)

      # optimizer.zero_grad()
      # outputs = model_gpu(images)

      # loss = loss_fn(outputs, labels)
      # loss.backward()

      # optimizer.step()
      # running_loss += loss.item()
      # if i % 1000 == 999:
      #   print('Epoch / Batch [%d / %d] - Loss: %.3f' %
      #         (epoch + 1, i + 1, running_loss / 1000))
      #   running_loss = 0.0

      optimizer.zero_grad()
      outputs = model(images)
      loss = loss_fn(outputs, labels)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()
      if i % 1000 == 999:
        print('Epoch / Batch [%d / %d] - Loss: %.3f' %
              (epoch + 1, i + 1, running_loss / 1000))
        running_loss = 0.0

In [0]:
cpu_train_time = timeit.timeit(
    "train_model_epochs(num_epochs)",
    setup="num_epochs=10",
    number=1,
    globals=globals(),
)

RuntimeError: ignored