In [1]:
# https://blog.paperspace.com/writing-cnns-from-scratch-in-pytorch/
import os
import ast
from PIL import Image
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import torchvision
import torch.nn.functional as F
import torch
import torch.optim as optim
import helper
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data.sampler import SubsetRandomSampler
# import torch.utils.data as data
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchsummary import summary
from imblearn.under_sampling import RandomUnderSampler

num_classes = 12
learning_rate = 0.001
num_epochs = 3
batch_size = 64
patience = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# torch.set_default_tensor_type('torch.cuda.FloatTensor')

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
ROOT_DIR = "/workspace/resnet/"
ROOT_DATA_DIR = "/workspace/resnet/data/raw"
ARCHIVE_DIR = os.path.join(ROOT_DATA_DIR, "archive.zip")
DATA_DIR = os.path.join(ROOT_DATA_DIR)#, "labelme-12-50k")
TRAIN_DIR = os.path.join(DATA_DIR, "train")
TRAIN_ANNOT_PATH = os.path.join(TRAIN_DIR, "annotation.txt")
TEST_DIR = os.path.join(DATA_DIR, "test")
TEST_ANNOT_PATH = os.path.join(TEST_DIR, "annotation.txt")
CLASSES_TXT_PATH = os.path.join(DATA_DIR, "classes.txt")
INTERIM_DATA_DIR = "/workspace/resnet/data/interim/"
MODEL_DIR = os.path.join(ROOT_DIR, "models")
CNN_PATH = os.path.join(MODEL_DIR, "2.1_resnet-balanced")

# import zipfile
# with zipfile.ZipFile(ARCHIVE_DIR, 'r') as zip_ref:
#     zip_ref.extractall(ROOT_DATA_DIR)

cpu


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
train_df_balanced = pd.read_csv(os.path.join(INTERIM_DATA_DIR,"balanced-train-40000.csv"))
test_df = pd.read_csv(os.path.join(INTERIM_DATA_DIR,"test-10000.csv"))

In [4]:
transform = transforms.Compose(
            [transforms.ToTensor(),
            transforms.Normalize((0.5,0.5,0.5),(0.5, 0.5, 0.5))]
            )

In [5]:
class MyDataset(Dataset):
    def __init__(self,annot_df,transform = None):
        self.annotations = annot_df
        self.transform = transform
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self,index):
        img = Image.open(self.annotations.img_path[index])
        y_label = torch.tensor(int(self.annotations.int_label[index]))
        if self.transform:
            image = transform(img)
        else:
            image = img
        return (image,y_label)

In [6]:
# train_data = MyDataset(train_df,transform = transform)
test_data = MyDataset(test_df,transform = transform)
train_data_balanced = MyDataset(train_df_balanced,transform = transform)


# train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle = True)
train_loader_balanced = DataLoader(dataset = train_data_balanced, batch_size = batch_size, shuffle = True)

test_loader

<torch.utils.data.dataloader.DataLoader at 0x7f6c8e3e37c0>

In [7]:
train_df_balanced.int_label.value_counts()

0     3438
9     3422
7     3392
6     3325
11    3317
10    3312
8     3305
1     3303
2     3302
4     3297
5     3295
3     3292
Name: int_label, dtype: int64

In [8]:
# create blocks then build resnet architecture

# identity downsample is going to be a conv layer which might be required if input size has changed or if the number of channels have changed.
# adapt identity to be added later once a few conv layers have gone through
class block(nn.Module):
    def __init__(self,in_channels,out_channels, identity_downsample = None, stride = 1):
        super(block,self).__init__()
        self.expansion = 4 # the number of channels after a block is 4 times what it was before it entered
        self.conv_res1 = nn.Conv2d(in_channels, out_channels, kernel_size = 1 , stride = 1, padding = 0)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv_res2 = nn.Conv2d(out_channels, out_channels, kernel_size = 3 , stride = stride, padding = 1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv_res3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size = 1 , stride = 1, padding = 0)
        self.bn3 = nn.BatchNorm2d(out_channels*self.expansion) #expand by 4
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        
    def forward(self, x):
        identity = x
        # go through identity layer in each mentioned layer above
        x = self.conv_res1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv_res2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv_res3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        x += identity
        x = self.relu(x)
        return x
        

In [None]:
class ResNet(nn.Module):
    def __init__(self,block, layers, image_channels=3, num_classes=12):
        #block is the block from the previous cell
        #layers is a list, tells us number of times to use the previous block [3,4,6]
        super(ResNet,self).__init__()
        self.in_channels = 64
        self.conv_l1 = nn.Conv2d(in_channels = image_channels, out_channels = 64, kernel_size = 7,stride = 2, padding = 3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        
        # ResNet Layers
        self.layer1 = self._make_layer(block,layers[0], out_channels = 64, stride = 1)
        self.layer2 = self._make_layer(block,layers[1], out_channels = 128, stride = 2)
        self.layer3 = self._make_layer(block,layers[2], out_channels = 256, stride = 2)
        self.layer3 = self._make_layer(block,layers[3], out_channels = 256, stride = 2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256*4, num_classes)
    
    def forward(self,x):
        x = self.conv_l1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x
        
    def _make_layer(self,block,num_residual_blocks,out_channels, stride):
        identity_downsample = None
        layers = [] # empty added as we proceed
        
        # below is when we are going to do an identity downsample
        # if size/number of channels changes. This is the only way for it to be added 
        if stride != 1 or self.in_channels != out_channels * 4:
            identity_downsample = nn.Sequential(nn.Conv2d(self.in_channels,out_channels*4,kernel_size = 1,stride = stride),
                                               nn.BatchNorm2d(out_channels*4))
        # change the number of channels
        # after we append to the list we change the number of channels
        layers.append(block(self.in_channels,out_channels,identity_downsample,stride))
        # update in_channels
        self.in_channels = out_channels*4
        
        for i in range(num_residual_blocks -1):
            layers.append(block(self.in_channels, out_channels))
            
        return nn.Sequential(*layers)
        
        

# model = ResNet(block,[3,4,6,3], 3, 12)
model = ResNet(block,[3,4,6,3], 3, 12)

print(model)

# Set Loss function with criterion
criterion = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  

# total_step = len(train_loader)
total_step = len(train_loader_balanced)

best_score = None
counter = 0

for epoch in range(num_epochs):
	#Load in the data in batches using the train_loader object
    for i, (images, labels) in enumerate(train_loader_balanced):
#     for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
        
    if best_score is None:
        best_score = loss
    else:
            # Check if val_loss improves or not.
        if loss < best_score:
                # val_loss improves, we update the latest best_score, 
                # and save the current model
            best_score = loss
            torch.save(model.state_dict(), CNN_PATH)
        else:
                # val_loss does not improve, we increase the counter, 
                # stop training if it exceeds the amount of patience
            counter += 1
            if counter >= patience:
                break
    if epoch == num_epochs:
        torch.save(model.state_dict(), CNN_PATH)

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


ResNet(
  (conv_l1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): block(
      (conv_res1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv_res2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv_res3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (identity_downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=

In [None]:
model.eval()

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in train_loader_balanced:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        prin
    
    print('Accuracy of the network on the {} train images: {} %'.format(total, 100 * correct / total))




In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))




In [None]:
summary(CNN(12),(3, 256, 256))

In [None]:
images, labels = next(iter(train_loader))
images.shape

In [None]:
images[0].shape

In [None]:
labels.shape

In [None]:
torchvision.transforms.functional.get_image_size(images)

In [None]:
print(test_df.str_label[1500])
Image.open(test_df.img_path[1500])
print(train_df.str_label[1000],train_df.img_path[1000])
Image.open(train_df.img_path[1000])

In [None]:
print(train_df.str_label[1000],train_df.img_path[1000])
Image.open(train_df.img_path[1000])

In [None]:
# transform = transforms.Compose(
#         [transforms.ToTensor(),
#         transforms.Normalize((0.5,0.5,0.5),(0.5, 0.5, 0.5))]
#     )

# test_data = datasets.ImageFolder('/workspace/resnet/data/raw/test/', transform=transform)
# test_data_loader  = data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4) 

# train_data = datasets.ImageFolder('/workspace/resnet/data/raw/train/', transform=transform)
# train_data_loader  = data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4) 

# train_data