In [1]:
# importing libraries
import numpy as np
import pandas as pd
from PIL import Image
import torch
import tensorflow as tf
import torchvision
import torch.nn.functional as F
from torchvision import transforms
from torchvision import datasets, transforms, models
from torch.utils.data import random_split
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
import pickle as pkl
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import sklearn
import seaborn as sns
import cv2
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import metrics
import PIL
from tqdm.notebook import tqdm, trange
import os

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [66]:
RezNetTransform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Grayscale(),
    transforms.ToTensor(),
])

In [78]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, images_folder, transform = RezNetTransform):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        filename = str(self.df.id[index])+'.jpg'
        label = self.df.stable[index]
        image = PIL.Image.open(os.path.join(self.images_folder, filename))
        
        sample = {'image': image, 'stable': label}
        if self.transform:
            sample['image'] = self.transform(sample['image'])
        return sample

In [79]:
dataset = CustomDataset("blocks-labels.csv", './data')

In [80]:
train_size = int(0.8 * len(dataset))
test_size = 51276 - train_size
train, test = torch.utils.data.random_split(dataset, [train_size, test_size])

In [81]:
train_loader = torch.utils.data.DataLoader(train, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=4, shuffle=True)

In [82]:
resnet = models.resnet18(pretrained=True)
for param in resnet.parameters():
    param.requires_grad = False
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 2)
resnet = resnet.to(device)

In [83]:
#Custom CNN
class BaseNet(nn.Module):   
    def __init__(self, h, w, outputs):
        super(BaseNet, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=5, stride=2)
        self.bn1 = torch.nn.BatchNorm2d(16)
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=5, stride=2)
        self.bn2 = torch.nn.BatchNorm2d(32)
        self.conv3 = torch.nn.Conv2d(32, 32, kernel_size=5, stride=2)
        self.bn3 = torch.nn.BatchNorm2d(32)

        # Number of Linear input connections depends on output of conv2d layers
        # and therefore the input image size, so compute it.
        def conv2d_size_out(size, kernel_size = 5, stride = 2):
            return (size - (kernel_size - 1) - 1) // stride  + 1
        convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
        convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
        linear_input_size = convw * convh * 32
        self.head = torch.nn.Linear(linear_input_size, outputs)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = torch.nn.functional.relu(self.bn1(self.conv1(x)))
        x = torch.nn.functional.relu(self.bn2(self.conv2(x)))
        x = torch.nn.functional.relu(self.bn3(self.conv3(x)))
        return self.head(x.view(x.size(0), -1))

In [84]:
baseNet = BaseNet(224, 224, 2).to(device)

In [85]:
input_size = 224*224
num_classes = 2

class LogRegModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
    
    def forward(self, x):
        x = x.reshape(-1, input_size)
        out = self.linear(x)
        return out

In [86]:
logreg = LogRegModel().to(device)
print(logreg)

LogRegModel(
  (linear): Linear(in_features=50176, out_features=2, bias=True)
)


In [87]:
print(logreg.linear.weight.shape, logreg.linear.bias.shape)
list(logreg.parameters())

torch.Size([2, 50176]) torch.Size([2])


[Parameter containing:
 tensor([[ 0.0026,  0.0019, -0.0010,  ...,  0.0027, -0.0033, -0.0026],
         [-0.0012,  0.0034,  0.0014,  ..., -0.0002, -0.0008, -0.0042]],
        device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([0.0023, 0.0007], device='cuda:0', requires_grad=True)]

In [88]:
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.SGD(baseNet.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.SGD(logreg.parameters(), lr=0.001, momentum=0.9)
# selected_model = resnet
# selected_model = resnet
selected_model = logreg
print(criterion, optimizer)
selected_model.to(device)
losses = []
for epoch in range(1):  # loop over the dataset
    running_loss = 0.0
    for i, data in enumerate(tqdm(train_loader)):
        if i%5 != 0:
            continue
        # get the inputs; data is a list of [inputs, labels]
        # zero the parameter gradients
        inputs, labels = data['image'].to(device), data['stable'].to(device)
        optimizer.zero_grad()
        # forward + backward + optimize
        output = selected_model(inputs)
#         print(inputs.shape, labels.shape)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 0:   
            print(loss.item())
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)
print('Finished Training of RezNet')

CrossEntropyLoss() SGD (
Parameter Group 0
    dampening: 0
    lr: 0.001
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


  0%|          | 0/10255 [00:00<?, ?it/s]

0.6352148056030273
[1,     1] loss: 0.003
51.04426574707031
[1,   201] loss: 4.511
13.00998592376709
[1,   401] loss: 4.076
3.8872718811035156
[1,   601] loss: 3.487
4.019809722900391
[1,   801] loss: 1.854
34.978641510009766
[1,  1001] loss: 1.495
4.43442440032959
[1,  1201] loss: 1.446
2.095283031463623
[1,  1401] loss: 2.133
9.603708267211914
[1,  1601] loss: 3.512
24.50957679748535
[1,  1801] loss: 1.862
0.0
[1,  2001] loss: 4.715
10.269353866577148
[1,  2201] loss: 5.898
13.207889556884766
[1,  2401] loss: 1.997
21.68103790283203
[1,  2601] loss: 2.466
16.98550796508789
[1,  2801] loss: 3.664
5.63859224319458
[1,  3001] loss: 3.803
5.370378494262695e-05
[1,  3201] loss: 1.620
4.830921649932861
[1,  3401] loss: 2.335
24.77782440185547
[1,  3601] loss: 3.272
12.501142501831055
[1,  3801] loss: 2.609
41.118927001953125
[1,  4001] loss: 1.810
26.28501319885254
[1,  4201] loss: 3.693
28.50503158569336
[1,  4401] loss: 2.173
6.046274662017822
[1,  4601] loss: 1.608
40.69782257080078
[1,

In [89]:
correct = 0
total = 0
i = 0
with torch.no_grad():
    for data in tqdm(test_loader):
        i += 1
        images, labels = data['image'].to(device), data['stable'].to(device)
        outputs = selected_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        if i % 200 == 0:   
            print(total)

print('Accuracy of the network on the test images: %d %%' % (
    100 * correct / total))

  0%|          | 0/2564 [00:00<?, ?it/s]

800
1600
2400
3200
4000
4800
5600
6400
7200
8000
8800
9600
Accuracy of the network on the test images: 52 %
