In [1]:
%matplotlib inline

# import the necessary libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as utils
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import torchvision.utils
import torchvision
from torch.autograd import Variable
from PIL import Image
import PIL.ImageOps
import os
from tqdm import tqdm_notebook

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# config
training_dir = "/content/drive/My Drive/AM205_FinalProject/"
training_csv = "/content/drive/My Drive/AM205_FinalProject/train/train_data.csv"
testing_csv = "/content/drive/My Drive/AM205_FinalProject/test/test_data.csv"
testing_dir = "/content/drive/My Drive/AM205_FinalProject/"
batch_size = 32
epochs = 3

# utils
def imshow(img, text=None, should_save=False):
    npimg = img.numpy()
    plt.axis("off")
    if text:
        plt.text(
            75,
            8,
            text,
            style="italic",
            fontweight="bold",
            bbox={"facecolor": "white", "alpha": 0.8, "pad": 10},
        )
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


def show_plot(iteration, loss):
    plt.plot(iteration, loss)
    plt.show()



In [4]:
# preprocessing and loading the dataset
class SiameseDataset:
    def __init__(self, training_csv=None, training_dir=None, transform=None):
        # used to prepare the labels and images path
        self.train_df = pd.read_csv(training_csv)
        self.train_df.columns = ["image1", "image2", "label"]
        self.train_dir = training_dir
        self.transform = transform

    def __getitem__(self, index):

        # getting the image path
        image1_path = os.path.join(self.train_dir, self.train_df.iat[index, 0])
        image2_path = os.path.join(self.train_dir, self.train_df.iat[index, 1])

        # Loading the image
        img0 = Image.open(image1_path)
        img1 = Image.open(image2_path)
        img0 = img0.convert("L")
        img1 = img1.convert("L")

        # Apply image transformations
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)

        return (
            img0,
            img1,
            torch.from_numpy(
                np.array([int(self.train_df.iat[index, 2])], dtype=np.float32)
            ),
        )

    def __len__(self):
        return len(self.train_df)

In [5]:
# create a siamese network
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()

        # Setting up the Sequential of CNN Layers
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 96, kernel_size=11, stride=1),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
        )

        # Defining the fully connected layers
        self.fc1 = nn.Sequential(
            nn.Linear(30976, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),
            nn.Linear(1024, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 16),
        )

    def forward_once(self, x):
        # Forward pass
        output = self.cnn1(x)
        output = output.view(output.size()[0], -1)
        output = self.fc1(output)
        return output

    def forward(self, input1, input2):
        # forward pass of input 1
        output1 = self.forward_once(input1)
        # forward pass of input 2
        output2 = self.forward_once(input2)
        return output1, output2

In [6]:
# Load the the dataset from raw image folders
siamese_dataset = SiameseDataset(
    training_csv,
    training_dir,
    transform=transforms.Compose(
        [transforms.Resize((105, 105)), transforms.ToTensor()]
    ),
)
train_dataloader = DataLoader(
    siamese_dataset, shuffle=True, num_workers=8, batch_size=1
)

test_dataset = SiameseDataset(
    training_csv=testing_csv,
    training_dir=testing_dir,
    transform=transforms.Compose(
        [transforms.Resize((105, 105)), transforms.ToTensor()]
    ),
)
test_dataloader = DataLoader(test_dataset, num_workers=6, batch_size=1, shuffle=True)


siamese_model = torch.load("/content/drive/My Drive/AM205_FinalProject/model3e")

  cpuset_checked))
  cpuset_checked))


In [7]:
train_data = np.zeros((len(siamese_dataset), 16))
test_data = np.zeros((len(test_dataset), 16))
train_label = np.zeros((len(siamese_dataset), 1))
test_label = np.zeros((len(test_dataset), 1))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pbar = tqdm_notebook(total=len(siamese_dataset), desc="Create Train Feature: ")
for i, data in enumerate(train_dataloader, 0):
  x0, x1, label = data
  train_data[i, :] = siamese_model.forward_once(x1.to(device)).data.cpu().numpy()
  train_label[i] = label
  pbar.update(1)
pbar.close()

pbar = tqdm_notebook(total=len(test_dataset), desc="Create Test Feature: ")
for i, data in enumerate(test_dataloader, 0):
  x0, x1, label = data
  test_data[i, :] = siamese_model.forward_once(x1.to(device)).data.cpu().numpy()
  test_label[i] = label
  pbar.update(1)
pbar.close()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


Create Train Feature:   0%|          | 0/35751 [00:00<?, ?it/s]

  cpuset_checked))
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


Create Test Feature:   0%|          | 0/9964 [00:00<?, ?it/s]

  cpuset_checked))


In [8]:
# Define Dataset
class FeatureDataset(Dataset):
  def __init__(self, X_train, y_train):
    self.X_train = X_train
    self.y_train = y_train

  def __len__(self):
    return self.X_train.shape[0]

  def __getitem__(self, index):
    return self.X_train[index], self.y_train[index]

In [9]:
batch_size = 32

feature_train_dataset = FeatureDataset(train_data, train_label)
feature_train_loader = DataLoader(feature_train_dataset, batch_size=batch_size,
                                  shuffle=True, num_workers=2)

feature_test_dataset = FeatureDataset(test_data, test_label)
feature_test_loader = DataLoader(feature_test_dataset, batch_size=batch_size,
                                 shuffle=True, num_workers=2)

In [65]:
# Gating Network to assign probabilities to experts
class FeedForward(nn.Module):

    def __init__(self ):
        super(FeedForward, self).__init__()

        self.fc1 = nn.Linear(16, 4)
        self.fc2 = nn.Linear(4, 1)
        #self.fc3 = nn.Linear(16, 4)
        #self.fc4 = nn.Linear(4, 1)

        #self.bn1 = nn.BatchNorm1d(64)
        #self.bn2 = nn.BatchNorm1d(16)
        #self.bn3 = nn.BatchNorm1d(4)

    def forward(self, x):

        x = F.relu(self.fc1(x))
        #x = self.bn1(x)
        x = F.relu(self.fc2(x))
        #x = self.bn2(x)
        #x = F.relu(self.fc3(x))
        #x = self.bn3(x)
        #x = F.relu(self.fc4(x))
        
        return x

In [66]:
model = FeedForward()
learning_rate = 1e-4
num_epochs = 3

# Define Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define Loss
criterion = nn.BCEWithLogitsLoss()

In [67]:
for epoch in range(num_epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    pbar = tqdm_notebook(total=len(siamese_dataset), desc="Epoch "+str(epoch))
    for i, data in enumerate(feature_train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs.float())
        loss = criterion(outputs, labels.reshape(-1, 1))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        pbar.update(batch_size)
    print('Epoch %d loss: %.8f' % (epoch, running_loss / len(feature_train_dataset)))
    pbar.close()

print('Finished Training')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


Epoch 0:   0%|          | 0/35751 [00:00<?, ?it/s]

Epoch 0 loss: 0.02284808


Epoch 1:   0%|          | 0/35751 [00:00<?, ?it/s]

Epoch 1 loss: 0.02171025


Epoch 2:   0%|          | 0/35751 [00:00<?, ?it/s]

Epoch 2 loss: 0.02167935
Finished Training


In [68]:
# Check the whole dataset
correct = 0
total = 0
predict_list = []
label_list = []
prob_list = []
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    pbar = tqdm_notebook(total=len(feature_test_dataset), desc="Prediction ")
    for data in feature_test_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images.float())
        # the class with the highest energy is what we choose as prediction
        predicted = (outputs.data > 0.6) + 0
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        predict_list.extend(list((outputs.data.numpy().flatten() > 0.5)*1))
        prob_list.extend(list(outputs.data.numpy().flatten()))
        label_list.extend(list(labels.data.numpy().flatten()))
        pbar.update(batch_size)
    pbar.close()

#print('Accuracy of the network on the 10000 test images: %d %%' % (correct / total))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if __name__ == '__main__':


Prediction :   0%|          | 0/9964 [00:00<?, ?it/s]

In [79]:
#from sklearn.metrics import roc_auc_score, plot_confusion_matrix, classification_report
#print(classification_report(label_list, predict_list))