In [1]:
import numpy as np
import pandas as pd
import copy
import os
import random
import shutil
import zipfile
from math import atan2, cos, sin, sqrt, pi, log

import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as pathces
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.transforms.functional as F2
from PIL import Image
from numpy import linalg as LA
from torch import optim, nn
from torch.utils.data import DataLoader, random_split,TensorDataset
from sklearn.model_selection import train_test_split
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import albumentations as A
from tqdm import tqdm
import xml.etree.ElementTree as ET
from torchsummary import summary

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [2]:
# Reading/Loading the dataset files
# TODO
train_set = pd.read_csv('train_info.csv')
PATH = 'train_images\\'
finalDimention = 128
transform = transforms.Compose([ transforms.ToTensor(), # normalizes to range [0,1]
                        transforms.Resize([finalDimention,finalDimention]),
                        transforms.Grayscale()
                       ])
images = []
labels = torch.zeros(len(train_set))
for i in range(len(train_set)):
    testIm = cv2.imread(PATH+train_set.file_name[i])
    images.append(transform(testIm))   
    if (train_set.hamiltonian[i]=='yes'):
        labels[i] = 1
    else:
        labels[i] = 0
img = Image.open(PATH+train_set.file_name[i]);


# cnts = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# print(cnts)
# seprate_image(img)



In [3]:
imagesT = torch.stack(images, dim=0)


In [4]:
labels

tensor([1., 0., 0.,  ..., 0., 1., 0.])

In [4]:
train_data,test_data, train_labels,test_labels = train_test_split(imagesT, labels, test_size=0.2,shuffle=True)
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# finally, translate into dataloader objects
batchsize    = 8
train_loader = DataLoader(train_data,batch_size=batchsize,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=len(test_data))

In [5]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [6]:
def makeTheNet(block, layers, num_classes = 4,printtoggle=False):

  class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes = 4):
      super(ResNet, self).__init__()

      self.print = printtoggle

      self.inplanes = 64
      self.conv1 = nn.Sequential(
                      nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3),
                      nn.BatchNorm2d(64),
                      nn.ReLU())
      self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
      self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
      self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
      self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
      #self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
      self.avgpool = nn.AvgPool2d(3, stride=1)
      self.fc1 = nn.Linear(9216, 256)
      self.fc2 = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, blocks, stride=1):
      downsample = None
      if stride != 1 or self.inplanes != planes:
          downsample = nn.Sequential(
              nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
              nn.BatchNorm2d(planes),
          )
      layers = []
      layers.append(block(self.inplanes, planes, stride, downsample))
      self.inplanes = planes
      for i in range(1, blocks):
          layers.append(block(self.inplanes, planes))
      return nn.Sequential(*layers)

    def forward(self, x):
      x = self.conv1(x)
      if self.print: print(f'First CPR block: {list(x.shape)}')
      x = self.maxpool(x)
      x = self.layer0(x)
      if self.print: print(f'2 CPR block: {list(x.shape)}')
      x = self.layer1(x)
      x = F.dropout2d(x,p=.2)
      if self.print: print(f'3 CPR block: {list(x.shape)}')
      x = self.layer2(x)
      if self.print: print(f'4 CPR block: {list(x.shape)}')
      #x = self.layer3(x)
      #if self.print: print(f'5 CPR block: {list(x.shape)}')
      x = self.avgpool(x)
      if self.print: print(f'avg block: {list(x.shape)}')
      nUnits = x.shape.numel()/x.shape[0]
      x = x.view(-1,int(nUnits))
      if self.print: print(f'Vectorized: {list(x.shape)}')
      x = self.fc1(x)
      x = F.dropout(x,.4)
      x = self.fc2(x)
      return x

  # create the model instance
  net = ResNet(block, layers, num_classes)
  lossfun = nn.BCEWithLogitsLoss()
  # optimizer
  optimizer = torch.optim.AdamW(net.parameters(),lr=.001)

  return net,optimizer,lossfun

In [9]:

net,optimizer,lossFun = makeTheNet(ResidualBlock, [3, 4, 6, 3],1,True)

X,y = next(iter(train_loader))
yHat = net(X)

# check size of output
print('\nOutput size:')
print(yHat.shape)
print(y.shape)

# # now let's compute the loss
#y = y[:,None]
loss = lossFun(yHat.squeeze(),y)
print(' ')
print('Loss:')
print(loss)

First CPR block: [8, 64, 64, 64]
2 CPR block: [8, 64, 32, 32]
3 CPR block: [8, 128, 16, 16]
4 CPR block: [8, 256, 8, 8]
avg block: [8, 256, 6, 6]
Vectorized: [8, 9216]

Output size:
torch.Size([8, 1])
torch.Size([8])
 
Loss:
tensor(0.8945, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)


In [10]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [65]:
summary(net.cpu(),(1,128,128),device="cpu")

First CPR block: [2, 64, 64, 64]
2 CPR block: [2, 64, 32, 32]
3 CPR block: [2, 128, 16, 16]
4 CPR block: [2, 256, 8, 8]
avg block: [2, 256, 6, 6]
Vectorized: [2, 9216]
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           3,200
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,928
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,928
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
    ResidualBlock-11           [-1, 64, 32, 32]               0

In [11]:
def function2trainTheModel(trainLoader,testLoader):

  # number of epochs
  numepochs = 10
  
  # create a new model
  net,optimizer,lossFun = makeTheNet(ResidualBlock, [3, 4, 6, 3],1)
  best_valid_Err = 100000
  # send the model to the GPU
  net.to(device)

  # initialize losses
  trainLoss = torch.zeros(numepochs)
  testLoss  = torch.zeros(numepochs)
  trainErr  = torch.zeros(numepochs)
  testErr   = torch.zeros(numepochs)


  # loop over epochs
  for epochi in range(numepochs):

    # loop over training data batches
    net.train()
    batchLoss = []
    batchErr  = []
    
    for step,batch in enumerate(trainLoader):
      # push data to GPU
      if step % 50 == 0 and not step == 0:
        print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(trainLoader)))
      X,y = batch
      X = X.to(device)
      y = y.to(device)

      # forward pass and loss
      yHat = net(X)
      loss = lossFun(yHat.squeeze(),y)

      # backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # loss and error from this batch
      batchLoss.append(loss.item())
      

    # and get average losses and error rates across the batches
    trainLoss[epochi] = np.mean(batchLoss)



    ### test performance
    net.eval()
    batchAcc = []
    print(f"Eval. train loss {trainLoss[epochi]}")
    for X,y in testLoader:
      X = X.to(device)
      y = y.to(device)
      # forward pass and loss
      with torch.no_grad():
        yHat = net(X)


    testErr[epochi]  = lossFun(yHat.squeeze(),y).item()

    if testErr[epochi] < best_valid_Err:
        best_valid_Err = testErr[epochi]
        torch.save(net.state_dict(), 'saved_weights6.pt')
    print(f'Finished epoch {epochi+1}/{numepochs}. Test Loss = {testErr[epochi]:.2f}')
  # end epochs

  # function output
  return trainLoss,testLoss,testErr,net

In [12]:
trainLoss,testLoss,testErr,net = function2trainTheModel(train_loader,test_loader)

  Batch    50  of    442.
  Batch   100  of    442.
  Batch   150  of    442.
  Batch   200  of    442.
  Batch   250  of    442.
  Batch   300  of    442.
  Batch   350  of    442.
  Batch   400  of    442.
Eval. train loss 1.862845778465271
Finished epoch 1/10. Test Loss = 1.23
  Batch    50  of    442.
  Batch   100  of    442.
  Batch   150  of    442.
  Batch   200  of    442.
  Batch   250  of    442.
  Batch   300  of    442.
  Batch   350  of    442.
  Batch   400  of    442.
Eval. train loss 0.433574378490448
Finished epoch 2/10. Test Loss = 0.75
  Batch    50  of    442.
  Batch   100  of    442.
  Batch   150  of    442.
  Batch   200  of    442.
  Batch   250  of    442.
  Batch   300  of    442.
  Batch   350  of    442.
  Batch   400  of    442.
Eval. train loss 0.3309047520160675
Finished epoch 3/10. Test Loss = 2.33
  Batch    50  of    442.
  Batch   100  of    442.
  Batch   150  of    442.
  Batch   200  of    442.
  Batch   250  of    442.
  Batch   300  of    442.


In [13]:
model,_,_ = makeTheNet(ResidualBlock, [3, 4, 6, 3],1)
model.load_state_dict(torch.load('saved_weights6.pt', weights_only=True))

<All keys matched successfully>

In [9]:
model = model.to('cpu')
X,y = next(iter(test_loader))
yHat = model(X)

# check size of output
print('\nOutput size:')
print(yHat)
print(y)




Output size:
tensor([[ 3.0474e+00],
        [ 3.0799e+00],
        [-5.3124e+00],
        [-4.1741e+00],
        [-5.2056e+00],
        [ 2.5299e+00],
        [-3.4927e+00],
        [-1.3757e+00],
        [-6.5969e+00],
        [ 1.1118e+00],
        [ 2.4990e+00],
        [-7.3999e+00],
        [ 2.6800e+00],
        [-5.4704e+00],
        [-5.9672e+00],
        [-1.2815e+00],
        [ 2.4400e+00],
        [-1.5143e+00],
        [-2.0816e+00],
        [-6.0026e+00],
        [-1.9072e+00],
        [-4.3022e+00],
        [ 4.1840e+00],
        [-4.2486e+00],
        [-6.9170e+00],
        [-1.0978e+01],
        [ 5.6878e-01],
        [-5.8929e+00],
        [ 4.6516e+00],
        [ 4.7251e+00],
        [ 1.0084e+00],
        [ 2.4786e+00],
        [-6.1533e-01],
        [-3.1057e+00],
        [-4.2302e+00],
        [-4.9400e+00],
        [ 4.9310e+00],
        [-7.2308e+00],
        [-6.6984e+00],
        [-8.5905e+00],
        [-5.9816e+00],
        [-1.2586e+00],
        [ 4.7628e+00

In [10]:
acc = torch.mean(((yHat.squeeze()>0).float() == y).float())
acc

tensor(0.9402)

In [11]:
# evaluate your model
from sklearn.metrics import f1_score
print(f1_score(y.detach().numpy(),yHat.detach().numpy()>0,average='binary'))
# To-Do

0.9384436701509872


In [12]:
del yHat

In [14]:
# To-Do
# test_set = pd.read_csv('../data/test.csv')
# testT = np.array(test_set,dtype=np.float32)
# testT = torch.tensor(testT).to(device)
# yHatt = torch.argmax(bestModel(testT),axis=1)
# yHatt = yHatt.to('cpu')
# predict = []
# for i in yHatt:
#     predict.append(allAuthor[i])
# submission = pd.DataFrame(predict,columns=['author'])
# submission =
pat = 'test_images\\'
names = []
imagess=[]
for subdir, dirs, files in os.walk(pat):
    for file in files:
        im =cv2.imread(pat+file)
        imagess.append(transform(im))
        names.append(file)

In [15]:
imagesT = torch.stack(imagess, dim=0)

In [16]:
yHat = model(imagesT)

In [17]:
answer = (yHat.squeeze()>0).int()

In [16]:
answer.shape

torch.Size([1572])

In [18]:
answer=answer.detach().numpy()

In [19]:
submission = pd.DataFrame({'file_name': names,'hamiltonian':answer})

In [20]:
submission

Unnamed: 0,file_name,hamiltonian
0,graph1000.png,0
1,graph1001.png,0
2,graph1004.png,0
3,graph1008.png,0
4,graph1009.png,1
...,...,...
1567,graph987.png,0
1568,graph989.png,1
1569,graph991.png,0
1570,graph996.png,0


In [20]:
import zipfile

#if not os.path.exists(os.path.join(os.getcwd(), 'Student_GPA.ipynb')):
 #   %notebook -e hamiltonian_graph.ipynb

def compress(file_names):
    print("File Paths:")
    print(file_names)
    compression = zipfile.ZIP_DEFLATED
    with zipfile.ZipFile("result.zip", mode="w") as zf:
        for file_name in file_names:
            zf.write('./' + file_name, file_name, compress_type=compression)

submission.to_csv('submission.csv', index=False)

file_names = ['hamiltonian_graph.ipynb', 'submission.csv']
compress(file_names)

File Paths:
['hamiltonian_graph.ipynb', 'submission.csv']
