# Computer Vision Project

#### Approach we will take for the Aerial vs Ground Natural Disaster computer vision project

## Import Packages

In [None]:
# Get this from GitHub Repository / Random Class Notebooks / etc.

# At minimum, we need PyTorch

# Run this cell only if working in Colab
# Connects to any needed files from GitHub and Google Drive
import os
import getpass

# Remove Colab default sample_data
!rm -r ./sample_data

# # Clone GitHub files to colab workspace
git_user = "sfhorng" # Enter user or organization name
git_email = "sh390@duke.edu" # Enter your email
repo_name = "AIPI-540-CV-Team-2-Project" # Enter repo name
# # Use the below if repo is private, or is public and you want to push to it
# # Otherwise comment next two lines out
git_token = getpass.getpass("enter git token") # Enter your github token 
git_path = f"https://{git_token}@github.com/{git_user}/{repo_name}.git"
!git clone "{git_path}"

# Install dependencies from requirements.txt file
notebook_dir = 'notebooks'
!pip install -r "{os.path.join(repo_name,'requirements.txt')}"

# Change working directory to location of notebook

path_to_data = os.path.join(repo_name,'data/raw')
%cd "{path_to_data}"
%ls

In [None]:
import os
import urllib.request
import tarfile
import copy
import time
import numpy as np
import pandas as pd
from torchsummary import summary
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pydicom
import cv2
from PIL import Image
import zipfile

import torch
from torchvision import datasets, transforms
import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

## Load data to our project

In [None]:
# Reference the following notebooks:


# Image classification (writing neural network from scratch)

from google.colab import drive

drive.mount('/content/drive/')

# Stephanie
# path = "/content/drive/My Drive/AIPI-540-Team-2-CV-Project-Datasets/Filtered/AIDER_filtered.zip"
#Amani
path="/content/drive/My Drive/AIDER_filtered.zip"
zip_ref = zipfile.ZipFile(path, 'r')
zip_ref.extractall(os.path.join(os.getcwd()))
zip_ref.close()

## Data Preparation: 
### Set up class to add labels, manipulate the images to size correctly, data augmentation

In [5]:
# AIDER - use approach based on image classification notebook (writing neural network from scratch)
# MEDIC - need to use DICOM

# labels = Aerial and Ground. Need to standardize "Fire", "Flood", "No Disaster" across both datasets so labels are the same

# Reference code to size images correctly. May not be in correct section, 
# but we might need to compare image sizes across our two data sets and standardize?

# Data augmentation to create new images from AIDER data set to get better class balance between aerial and ground

In [None]:
# Set up transformations for training and validation (test) data
# For training data we will do randomized cropping to get to 224 * 224, randomized horizontal flipping, and normalization
# For test set we will do only center cropping to get to 224 * 224 and normalization
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

#train_dataset = datasets.ImageFolder(os.path.join(os.getcwd(), 'AIDER_filtered/train'), data_transforms['train'])
#val_dataset = datasets.ImageFolder(os.path.join(os.getcwd(), 'AIDER_filtered/train'), data_transforms['val'])

In [None]:
#Create datasets for traingin and validation sets--AIDER
train_dataset = datasets.ImageFolder(os.path.join(os.getcwd(), 'AIDER_filtered/train'), data_transforms['train'])
val_dataset = datasets.ImageFolder(os.path.join(os.getcwd(), 'AIDER_filtered/train'), data_transforms['val'])

In [None]:
# Create DataLoaders for training and validation sets
#num_workers:tells dataloader instane how many sub-processes to use for data loading. If zero, GPU has to weight for CPU
#load data.greater num_workers more efficiently the CPU load data and less the GPU has to wait. 
#Google Colab: suggested num_workers=2
batch_size = 4
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                             shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size,
                                             shuffle=False, num_workers=2)

# Set up dict for dataloaders
dataloaders = {'train':train_loader,'val':val_loader}
# Store size of training and validation sets
dataset_sizes = {'train':len(train_dataset),'val':len(val_dataset)}
# Get class names associated with labels
class_names = train_dataset.classes
#print(class_names)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Images-AIDER-Train

In [None]:
images, labels = iter(train_loader).next()
images = images.numpy()
fig = plt.figure(figsize=(10, 6))
for idx in np.arange(batch_size):
    ax = fig.add_subplot(2, batch_size//2, idx+1, xticks=[], yticks=[])
    image = images[idx]
    image = image.transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = std * image + mean
    image = np.clip(image, 0, 1)
    ax.imshow(image)
    ax.set_title("{}".format(class_names[labels[idx]]))

Images-AIDER-Val

In [None]:
images, labels = iter(val_loader).next()
images = images.numpy()
fig = plt.figure(figsize=(10, 6))
for idx in np.arange(batch_size):
    ax = fig.add_subplot(2, batch_size//2, idx+1, xticks=[], yticks=[])
    image = images[idx]
    image = image.transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = std * image + mean
    image = np.clip(image, 0, 1)
    ax.imshow(image)
    ax.set_title("{}".format(class_names[labels[idx]]))

# Amani:
## Set up Model Architecture

ResNet50:CNN's have a major disadvantage-'Vanishing Gradient Problem';recall that during backpropagation, the value of gradient decreases significantly, thus hardly any change occurs to the weights.
##ResNet is used to make use of the "Skip Connection"
##Skip connection-adding the orginal input to the output of the convolutional block.

In [None]:
class ResNetResize(nn.Module):

    def __init__(self):
        super().__init__()

        ### DOWNSCALING LAYER ###

        # Conv layer: (3,224,224) -> (8,224,224)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
        # Conv layer output size = (W-F+2P)/S+1 = (224-3+2)/1+1 = 224
        self.bn1 = nn.BatchNorm2d(8)
        
        ### RESNET BLOCK 1 ###

        # Main path
        # Conv layer: (8,224,224) -> (16,112,112)
        self.block1conv1 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=1)
        self.block1bn1 = nn.BatchNorm2d(16)
        # Conv layer: (16,112,112) -> (16,112,112)
        self.block1conv2 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.block1bn2 = nn.BatchNorm2d(16)

        # Skip connection: (8,224,224) -> (16,112,112)
        self.block1skipconv = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=1)
        self.block1skipbn = nn.BatchNorm2d(16)

        ### RESNET BLOCK 2 ###

        # Main path
        # Conv layer: (16,112,112) -> (32,56,56)
        self.block2conv1 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.block2bn1 = nn.BatchNorm2d(32)
        # Conv layer: (32,56,56) -> (32,56,56)
        self.block2conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.block2bn2 = nn.BatchNorm2d(32)

        # Skip connection: (16,112,112) -> (32,56,56)
        self.block2skipconv = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.block2skipbn = nn.BatchNorm2d(32)

        ### FINAL LAYERS ###

        # Average pooling layer: (32,56,56) -> (32,8,8)
        self.pool2 = nn.AvgPool2d(kernel_size=7, stride=7)
        
        # Input size: 32 * 8 * 8 =  from pooling layer$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        # 2 output channels (for the 2 classes)put 3
        self.fc1 = nn.Linear(32*8*8, 3)
        
    def forward(self, x):

        ### DOWNSCALING LAYER ###
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)

        ### RESNET BLOCK 1 ###
        skipconnect = x
        skipconnect = self.block1skipconv(skipconnect)
        skipconnect = self.block1skipbn(skipconnect)

        x_out = self.block1conv1(x) # conv1
        x_out = self.block1bn1(x_out) # batch norm 1
        x_out = F.relu(x_out) # relu

        x_out = self.block1conv2(x_out) # conv2
        x_out = self.block1bn2(x_out) # batch norm 2

        # Add layer and skipconnect, then activation
        x_out += skipconnect
        x_out = F.relu(x_out)

        ### RESNET BLOCK 2 ###
        skipconnect = x_out
        skipconnect = self.block2skipconv(skipconnect)
        skipconnect = self.block2skipbn(skipconnect)

        x_out = self.block2conv1(x_out) # conv1
        x_out = self.block2bn1(x_out) # batch norm 1
        x_out = F.relu(x_out) # relu

        x_out = self.block2conv2(x_out) # conv2
        x_out = self.block2bn2(x_out) # batch norm 2

        # Add layer and skipconnect, then activation
        x_out += skipconnect
        x_out = F.relu(x_out)

        ### FINAL LAYERS ###
        x_out = self.pool2(x_out)
        # Flatten into a vector to feed into linear layer
        x_out = x_out.view(x_out.size(0), -1)
        # Linear layer
        x_out = self.fc1(x_out)
        
        return x_out

##Instantiate the model and display(optional)

In [None]:
# Instantiate the model
net = ResNetResize()

# Display a summary of the layers of the model and output shape after each layer
summary(net,(images.shape[1:]),batch_size=batch_size,device="cpu")

## Train model 

In [None]:
def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=60):

    model = model.to(device) # Send model to GPU if available

    iter_num = {'train':0,'val':0} # Track total number of iterations

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Get the input images and labels, and send to GPU if available
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the weight gradients
                optimizer.zero_grad()

                # Forward pass to get outputs and calculate loss
                # Track gradient only for training data
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backpropagation to get the gradients with respect to each weight
                    # Only if in train
                    if phase == 'train':
                        loss.backward()
                        # Update the weights
                        optimizer.step()

                # Convert loss into a scalar and add it to running_loss
                running_loss += loss.item() * inputs.size(0)
                # Track number of correct predictions
                running_corrects += torch.sum(preds == labels.data)

                # Iterate count of iterations
                iter_num[phase] += 1

            # Calculate and display average loss and accuracy for the epoch
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
    
    return

In [None]:
#Instantiate the model
net = ResNetResize()

# Cross entropy loss combines softmax and nn.NLLLoss() in one single class.
criterion = nn.CrossEntropyLoss()

# Define optimizer
optimizer = optim.Adam(net.parameters(),  lr=0.001)

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_model(net, criterion, optimizer, dataloaders, device, num_epochs=60)



### Visualize results

In [None]:
# Display a batch of predictions

def visualize_results(model,dataloader,device):
    model = model.to(device) # Send model to GPU if available
    with torch.no_grad():
        model.eval()
        # Get a batch of validation images
        images, labels = iter(val_loader).next()
        images, labels = images.to(device), labels.to(device)
        # Get predictions
        _,preds = torch.max(model(images), 1)
        preds = np.squeeze(preds.cpu().numpy())
        images = images.cpu().numpy()

    # Plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(15, 10))
    for idx in np.arange(len(preds)):
        ax = fig.add_subplot(2, len(preds)//2, idx+1, xticks=[], yticks=[])
        image = images[idx]
        image = image.transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = std * image + mean
        image = np.clip(image, 0, 1)
        ax.imshow(image)
        ax.set_title("{} ({})".format(class_names[preds[idx]], class_names[labels[idx]]),
                    color=("green" if preds[idx]==labels[idx] else "red"))
    return

visualize_results(net,val_loader,device)

## Data Manipulation

In [6]:
#MEDIC:
## Choose which pictures to keep from MEDIC (there are a lot more than AIDER)
## Drop pictures we don't want from MEDIC
## Drop Mild Category
## Drop Not Informative label if they exist?
## Only Keep fires, floods, and not disasters

#AIDER:
## Only keep fires, floods and not disasters


## Combine Datasets

In [7]:
# Merge MEDIC and AIDER

## Train_test_split into our training and test set

In [8]:
# Set up training, validation, and test sets

# Amani:
## Set up Model Architecture

In [17]:
# Need to decide on best architecture to use based on our goals. Performance/computational resources tradeoffs

# Are there pre-trained models that are less computationally heavy to start off with? Research this & include in PowerPoint
# Transfer Learning ^

## Train model 

In [15]:
# Reference existing code to train model

## Evaluate Performance

In [16]:
# Validation / Test set

## Report on Statistics

In [11]:
# Performance, recall, F1-score

In [12]:
# Ground vs Aerial analysis