In [1]:
import torchvision.models as models
import os
import json

import matplotlib as plt
import matplotlib.image as mpimg
import numpy as np

from sklearn import preprocessing
import math
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F


In [2]:
# Get Label data depending on source image index
def find_specific_lookup(data, source_image, query_name):
    for entry in data:
        if entry["search_image"] == source_image:
            for template in entry["templates"]:
                if template["template"] == query_name:
                    return template
    return None

In [3]:
#get image pairs
base_path = os.path.dirname(os.getcwd())

#label path
lbl_path = os.path.join(base_path, 'Data/labels/train_template_matching.json')

#source and query images
s_img_path = os.path.join(base_path, 'Data/map_train/51.99908_4.373749.png')
q_img_path = os.path.join(base_path, 'Data/train_template_matching')

#for now source path is constant
s_img = mpimg.imread(s_img_path)

with open(lbl_path, 'r') as file:
    label = json.load(file)

images = []
data = []

# Read images in path
for file in os.listdir(q_img_path):
    if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
            q_img = mpimg.imread(os.path.join(q_img_path, file))
            images.append([q_img[:, :, :3], s_img[:,:,:3]])

In [4]:
# Coordinates to pixels in reference to the source image
def CoordToPixel(q_cntr_lat, q_cntr_lon, s_cntr_lat, s_cntr_lon):
    w = 240         #pixel width
    h = 240         #pixel height
    s_zoom = 15     #source image zoom (google maps)

    parallelMultiplier = math.cos(s_cntr_lat * math.pi / 180)
    degreesPerPixelX = 360 / math.pow(2, s_zoom + 8)
    degreesPerPixelY = 360 / math.pow(2, s_zoom + 8) * parallelMultiplier

    Y = (s_cntr_lat - q_cntr_lat)/degreesPerPixelY + 0.5*h
    X = (q_cntr_lon - s_cntr_lon)/degreesPerPixelX + 0.5*w
    return X,Y

In [5]:
# normalize data
lbl_data_norm = []
s_img_all = []
q_img_all = []

s_cntr_lat, s_cntr_lon = label[0]['search_image_gps']
for tracker, i in enumerate(label[0]['templates']):
    q_cntr_lat, q_cntr_lon = i['gps_coords']

    # Convert images to numpy arrays
    q_img = np.array(images[tracker][0])
    s_img = np.array(images[tracker][1])

    s_img_all.append(s_img)
    q_img_all.append(q_img)

    lbl_coord = CoordToPixel(q_cntr_lat, q_cntr_lon, s_cntr_lat, s_cntr_lon)
    lbl_coord = np.array(lbl_coord)
    
    lbl_data_norm.append(lbl_coord)

s_img_all = np.array(s_img_all)
q_img_all = np.array(q_img_all)
lbl_data_norm = np.array(lbl_data_norm)

# Flatten images for scaling
n_samples, height, width, channels = q_img_all.shape

q_img_reshape = q_img_all.reshape(n_samples, height * width * channels)
s_img_reshape = s_img_all.reshape(n_samples, height * width * channels)

# Normalize the images
scaler_q = preprocessing.StandardScaler()
scaler_q.fit(q_img_reshape)

# Fit and transform
q_img_norm = scaler_q.transform(q_img_reshape)
s_img_norm = scaler_q.transform(s_img_reshape)

# Normalize the labels

scaler_lbl = preprocessing.StandardScaler()
lbl_data_norm = scaler_lbl.fit_transform(lbl_data_norm)

In [6]:
# Create the Siamese Neural Network with resnet50 (much to fucking large)
#class SiameseNetwork(nn.Module):
#    def __init__(self):
#        super(SiameseNetwork, self).__init__()
#
#        resnet = models.resnet50(pretrained=True)
#        
#        # Remove the last fully connected layer (classification layer)
#        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1]) # Remove the final FC layer
#
#        self.fc1 = nn.Sequential(
#            nn.Linear(2048, 1024),  # ResNet50 outputs 2048-d features
#            nn.ReLU(inplace=True),
#            
#            nn.Linear(1024, 240),
#            nn.ReLU(inplace=True),
#            
#            nn.Linear(240, 2)  # Output is a 2D embedding
#        )
#        
#    def forward_once(self, x):
#        # Pass input through the feature extractor (ResNet50)
#        output = self.feature_extractor(x)
#        
#        # Flatten the output tensor
#        output = output.view(output.size(0), -1)
#        
#        # Pass through fully connected layers
#        output = self.fc1(output)
#        return output
#
#    def forward(self, input1, input2):
#        # Pass both inputs through the network
#        output1 = self.forward_once(input1)
#        output2 = self.forward_once(input2)
#
#        return output1, output2

In [7]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 48, kernel_size=11, stride=1),  # Output channels reduced from 96 to 48
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(3, stride=2),
           
            nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2),  # Output channels reduced from 256 to 128
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),

            nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),  # Output channels reduced from 384 to 192
            nn.ReLU(inplace=True),
        
            nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),  # Output channels reduced from 256 to 128
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
        )

        # Fully connected layers for each branch
        self.fc1 = nn.Sequential(
            nn.Linear(15488, 512),  # Input size adjusted for reduced output dimensions
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
        )
        
        # Output layer combining the features of both branches
        self.fc2 = nn.Sequential(
            nn.Linear(512 * 2, 2)  # Adjusted for reduced fc1 output size
        )

    def forward_once(self, x):
        # Forward pass for one input
        x = self.cnn1(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        return x

    def forward(self, input1, input2):
        # Forward pass for both inputs
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        
        # Concatenate the features from both branches
        combined = torch.cat((output1, output2), dim=1)
        out = self.fc2(combined)  # Final output
        return out

In [8]:
class EuclidianLoss(torch.nn.Module):
    def __init__(self, margin=1.0):
        super(EuclidianLoss, self).__init__()
        self.margin = margin

    def forward(self, x, y):
        # euclidian distance
        diff = x - y
        dist_sq = torch.sum(torch.pow(diff, 2), 1)
        loss = torch.sqrt(dist_sq)

        return loss


In [9]:
model = SiameseNetwork()

optimizer = optim.Adam(model.parameters(), lr = 0.0005 )
criterion = EuclidianLoss()

In [10]:
# Custom Dataset Class (similar to the tutorial)
class SiameseDataset(Dataset):
    def __init__(self, q_imgs, s_imgs, labels):
        self.q_imgs = q_imgs
        self.s_imgs = s_imgs
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # Convert images and labels to tensors
        q_img = torch.tensor(self.q_imgs[idx], dtype=torch.float32)  # Adjust dtype as needed
        s_img = torch.tensor(self.s_imgs[idx], dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        
        return q_img, s_img, label

dataset = SiameseDataset(q_img_norm, s_img_norm, lbl_data_norm)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)


In [11]:
import torch

def print_model_size(model):
    total_params = 0
    for param in model.parameters():
        param_count = param.numel()  # Get the number of elements in the parameter tensor
        total_params += param_count
        print(f"{param.size()} - {param_count} parameters")
    
    print(f"Total number of parameters: {total_params}")
    return total_params

# Example usage
print_model_size(model)  

torch.Size([48, 1, 11, 11]) - 5808 parameters
torch.Size([48]) - 48 parameters
torch.Size([128, 48, 5, 5]) - 153600 parameters
torch.Size([128]) - 128 parameters
torch.Size([192, 128, 3, 3]) - 221184 parameters
torch.Size([192]) - 192 parameters
torch.Size([128, 192, 3, 3]) - 221184 parameters
torch.Size([128]) - 128 parameters
torch.Size([512, 15488]) - 7929856 parameters
torch.Size([512]) - 512 parameters
torch.Size([2, 1024]) - 2048 parameters
torch.Size([2]) - 2 parameters
Total number of parameters: 8534690


8534690

In [None]:
counter = []
loss_history = [] 
iteration_number= 0
tracker = 0
loss_contrastive = 0
# Iterate throught the epochs
for epoch in range(20):
    
    # Iterate over batches
    for i, tup in enumerate(dataloader,0):

        img0, img1, label = tup

        # Zero the gradients
        optimizer.zero_grad()

        # Pass in the two images into the network and obtain two outputs
        output1, output2 = model(img0, img1)

        # Pass the outputs of the networks and label into the loss function
        loss_contrastive = criterion(output1, output2, label)

        # Calculate the backpropagation
        loss_contrastive.backward()

        # Optimize
        optimizer.step()

        # Every 10 batches print out the loss
        if epoch % 2 == 0:
            print(f"Epoch number {epoch}\n Current loss {loss_contrastive.item()}\n")
            iteration_number += 2
        
            counter.append(iteration_number)
            loss_history.append(loss_contrastive.item())
            #print(f"Epoch number {epoch}\n Current loss {loss_contrastive.item()}\n")


In [None]:
import matplotlib.pyplot as plt
# Plotting data
def show_plot(iteration,loss):
    plt.plot(iteration,loss)
    plt.show()

In [None]:
show_plot(counter, loss_history)

NameError: name 'counter' is not defined

In [None]:
import datetime
import os

now = datetime.datetime.now()

filename = now.strftime("resnet50_%d-%m-%Y_%H-%M")

directory  = os.path.join(os.getcwd(), filename+'.txt')

model_dir = os.path.join(os.getcwd(), filename+'.h5')

model.save(model_dir)

comment = """
Siamese CNN trained on templated and source images
Outputs of siamese CNN are fully connected to linear layer
Output in pixels
"""

with open(os.path.join(os.getcwd(), filename+'.txt'), 'w') as f:
    f.write(comment)