In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import torch.onnx as onnx

import os
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
from torchvision import transforms
from torchvision import *



In [2]:
transforms = transforms.Compose([
    transforms.Resize((80, 80)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])

In [3]:
train_ds = datasets.ImageFolder('C:/Users/yyesu/Documents/Deep Learning Project FIles/intel-image-classification/seg_train/seg_train', transform=transforms)
test_ds = datasets.ImageFolder('C:/Users/yyesu/Documents/Deep Learning Project FIles/intel-image-classification/seg_test/seg_test', transform=transforms)

In [4]:
trainloader = DataLoader(train_ds, batch_size=20, shuffle=True)
testloader = DataLoader(test_ds, batch_size=20, shuffle=False)

In [5]:
#https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf
class ResnetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResnetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) #we include stride here because not all the blocks we create are gonna have a stride of 1
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)#inplace means that it will not allocate new memory and change tensors inplace
        self.conv2 = nn.Conv2d(out_channels,out_channels, kernel_size=3,padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample #this is incase the input and output channels are not the same. it is initiated as none

#i was getting errors like:The size of tensor a (90) must match the size of tensor b (94) at non-singleton dimension 3
#apperently its because i didnt include padding in the convolution layers.the outputs are supposed to be the same but it wasnt, 
#because it was getting smaller with each convolution... take note for next time man
#
    def forward(self, x):
        skip_connection = x #input to add to end of block
        
        x = self.relu(self.bn1(self.conv1(x)))        
        x = self.bn2(self.conv2(x))
          
        if self.downsample:# is not None:
            skip_connection = self.downsample(skip_connection)
    
        x += skip_connection
        x = self.relu(x)
                
        return x
            
        
        

In [6]:
class ResNet(nn.Module):
    
    def __init__(self, block, layers): #block in this case is the resnet block defined above. could have been a bottle neck block if I hadn't chickend out of implementing that
        super(ResNet,self).__init__()

        self.conv = nn.Conv2d(3,64, 7,stride =1) #remember input is bw not rgb
        
        self.bn= nn.BatchNorm2d(64)
        
        self.relu =nn.ReLU(inplace =True)
        
        #self.max_pool = nn.MaxPool2d(3, stride=2)
        
        self.in_channels=64
        
        self.layer1 = self.make_layer(block, 64, layers[0])
        self.layer2 = self.make_layer(block, 128, layers[1],2) #stride =2
        self.layer3 = self.make_layer(block, 256, layers[2],2)
        self.layer4 = self.make_layer(block, 512, layers[3],2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(512,6)
        

    def make_layer(self, block, out_channels, num_of_blocks, stride = 1):
        
        downsample = None
        
        if (stride != 1) or (self.in_channels != out_channels): 
            downsample = nn.Sequential(nn.Conv2d(self.in_channels, out_channels,1,stride=stride),
                                      nn.BatchNorm2d(out_channels)) #downsample to be applied to the ResnetBlock
            
        block_layers=[]  #layers of blocks
        block_layers.append(block(self.in_channels, out_channels, stride, downsample))
        
        #change the amount of input channels to output channels for the next set of layers that youll define in __init__
        self.in_channels = out_channels
        
        #the first block is being added separately because the input dimensions may differ, hence the stride and downsampling
        for i in range(1, num_of_blocks): #since we already added the first block, we start from the first index
            block_layers.append(block(out_channels, out_channels))
            
        return nn.Sequential(*block_layers) #this iterates over the Block_layers and executes each block in the list
    
    def forward(self,x):
        x = self.conv(x)
        x = self.bn(x)
        #x = self.relu(x)
        #x = self.max_pool(x)
    
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x=F.softmax(x, dim =1)
        
        return x

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    
    
net=ResNet(ResnetBlock, [3,4,6,3]).to(device) #[3,4,6,3] is the number of blocks for each layer



In [7]:
tb=SummaryWriter("runs/Train")
tb_test=SummaryWriter("runs/Test")

In [8]:
optimizer= optim.Adam(net.parameters(), lr =0.001)
loss_function=nn.CrossEntropyLoss()

In [14]:
PATH="image_scene_classification_3ch_v2.pth"

def train(net):
    loss=0
    epochs=30
    
    #...resume learning
    #***********comment out if starting training from scratch***********
    
    checkpoint = torch.load("image_scene_classification_3ch.pth")
    net.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']

    net.train()
    
    #*******************************************************************

    for epoch in range(epochs):
        trainloss = 0
        traintotal = 0
        trainsuccessful = 0
        for traininput, trainlabel in trainloader:
            traininputs, trainlabels = traininput.to(device), trainlabel.to(device) 
            optimizer.zero_grad()
            trainpredictions = net(traininputs)
            _, trainpredict = torch.max(trainpredictions.data, 1)
            loss = loss_function(trainpredictions, trainlabels)
            loss.backward()
            optimizer.step()
            trainloss += loss.item()
            traintotal += trainlabels.size(0)
            trainsuccessful += (trainpredict == trainlabels).sum().item()
        else:
            testloss = 0
            testtotal = 0
            testsuccessful = 0
            with torch.no_grad():
                net.eval()
                for testinput, testlabel in testloader:
                    testinputs, testlabels = testinput.to(device), testlabel.to(device)
                    testpredictions = net(testinputs)
                    _, testpredict = torch.max(testpredictions.data, 1)
                    tloss = loss_function(testpredictions, testlabels)
                    testloss += tloss.item()
                    testtotal += testlabels.size(0)
                    testsuccessful += (testpredict == testlabels).sum().item()
                net.train()

            tb.add_scalar('Loss', trainloss/len(trainloader), epoch)        
            tb_test.add_scalar('Loss', testloss/len(testloader), epoch)

            tb.add_scalar('Accuracy', (100*trainsuccessful/traintotal), epoch)
            tb_test.add_scalar('Accuracy', (100*testsuccessful/testtotal), epoch)

            if(epoch%2==0):
                torch.save({
                    'epoch':epoch,
                    'model_state_dict': net.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss':loss
                },
                PATH)

In [None]:
#train(net)

In [11]:
#convert model to onnx
PATH="image_scene_classification_3ch.pth"
checkpoint = torch.load(PATH)
net.load_state_dict(checkpoint['model_state_dict'])
net.eval()

ResNet(
  (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1))
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): ResnetBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResnetBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

In [21]:
#this is for converting the model to onnx. 
'''
x=torch.randn(1,3,80,80,requires_grad=True).to(device)
onnx.export(net,
           x,
           "scene_classification_ch3.onnx",
           export_params=True,
           opset_version=10,
           do_constant_folding=True,
           input_names =['input'],
           output_names = ['output'],
           dynamic_axes={'input' : {0: 'batch_size'},
                        'output' : {0: 'batch_size'}})
'''