In [1]:
# Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [2]:
# Checking for device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
# Transforms
transformer = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), #convert 0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1, 1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [4]:
# DataLoader
# Path for the training and tresting directory
train_path = '/home/sadam/Aletheia-AI/Aletheia-AI Developments/Pytorch_Training/intel_image_dataset_training/scene_detection/seg_train/seg_train'
test_path = '/home/sadam/Aletheia-AI/Aletheia-AI Developments/Pytorch_Training/intel_image_dataset_training/scene_detection/seg_test/seg_test'

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size=256, shuffle=True
)
test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform=transformer),
    batch_size=256, shuffle=True
)

In [5]:
# Categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [6]:
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [7]:
# CNN Network

class ConvNet(nn.Module):
    def __init__(self, num_classes=6):
        super(ConvNet, self).__init__()
        # Output size after convolution filter
        # ((w-f+2P)/s)+1
        # Input shape = (256, 3, 150, 150)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        # Shape = (256, 12, 150, 150)
        
        self.relu1 = nn.ReLU()
        # Shape = (256, 12, 150, 150)
        
        self.pool = nn.MaxPool2d(kernel_size=2)
        # Reduce the image size be factor 2
        # Shape = (256, 12, 75, 75)
        
        
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 20, 75, 75)        
        self.relu2 = nn.ReLU()
        # Shape = (256, 20, 75, 75)
        
        
        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        # Shape = (256, 32, 75, 75)
        self.relu3 = nn.ReLU()
        # Shape = (256, 32, 75, 75)
        
        self.fc = nn.Linear(in_features=75 * 75 * 32, out_features=num_classes)
        
        
    # Feed Forward function
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        output = self.pool(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        # Above output will be in mstrix form, with shape (256, 32, 75, 75)
        
        output = output.view(-1, 32 * 75 * 75)
        
        output = self.fc(output)
        return output
        
        

In [8]:
model = ConvNet(num_classes=6).to(device)

In [9]:
# Optimizer and loss function
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

In [10]:
num_epoch = 10

In [11]:
# Calculating the size of training and testing images
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))

In [12]:
print(train_count, test_count)

14034 3000


In [13]:
# Model Training and saving best model

best_accuracy = 0.0

for epoch in range(num_epoch):
    # Evaluation and training on training dataset
    model.train()
    train_accuracy = 0.0
    train_loss=0.0
    
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss += loss.cpu().data*images.size(0)
        _, prediction = torch.max(outputs.data,1)
        
        train_accuracy += int(torch.sum(prediction==labels.data))
        
    train_accuracy = train_accuracy/train_count
    train_loss = train_loss/train_count
    
    
    # Evaluating on testing dataset
    model.eval()
    test_accuracy = 0.0
    for i, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction==labels.data))
        
    test_accuracy = test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+'Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    # Save the best model
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy=test_accuracy

  return F.conv2d(input, weight, bias, self.stride,


Epoch: 0Train Loss: tensor(8.0528) Train Accuracy: 0.5413282029357275 Test Accuracy: 0.616
Epoch: 1Train Loss: tensor(1.4635) Train Accuracy: 0.718540686903235 Test Accuracy: 0.7016666666666667
Epoch: 2Train Loss: tensor(1.2137) Train Accuracy: 0.7617927889411429 Test Accuracy: 0.6893333333333334
Epoch: 3Train Loss: tensor(0.8111) Train Accuracy: 0.8163745190252245 Test Accuracy: 0.6606666666666666
Epoch: 4Train Loss: tensor(0.6284) Train Accuracy: 0.8594128544962235 Test Accuracy: 0.43633333333333335
Epoch: 5Train Loss: tensor(0.6207) Train Accuracy: 0.8646145076243409 Test Accuracy: 0.7336666666666667
Epoch: 6Train Loss: tensor(0.3768) Train Accuracy: 0.9062989881715833 Test Accuracy: 0.7156666666666667
Epoch: 7Train Loss: tensor(0.4472) Train Accuracy: 0.8999572466866183 Test Accuracy: 0.7343333333333333
Epoch: 8Train Loss: tensor(0.1968) Train Accuracy: 0.9449907367821007 Test Accuracy: 0.7183333333333334
Epoch: 9Train Loss: tensor(0.2275) Train Accuracy: 0.9363688185834402 Test Ac