In [3]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
from cnn_finetune import make_model
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import torch.optim as optim
import warnings
warnings.filterwarnings("ignore")

In [4]:
torch.cuda.is_available()

True

In [5]:
def show_image(image):
    plt.imshow(image)
    # pause a bit so that plots are updated
    plt.pause(0.01)

In [54]:
def convert_url_to_local_path(url):
    '''
    gets the location of the image in the shared directory so we don't have to redownload
    '''
    return '/home/jovyan/course/ladi/'+'/'.join(url.split('/')[3:])

class MLCNN_Dataset(Dataset):

    def __init__(self, csv_file, label_csv, transform = None):
        """
        Args:
            csv_file (string): Path to the csv file with metadata.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.final_metadata = pd.read_csv(csv_file)
        
        # get the path in the shared directory
        self.final_metadata['local_path'] = self.final_metadata['url'].apply(convert_url_to_local_path)
        
        self.final_label = pd.read_csv(label_csv)
        
        self.final_data = pd.merge(self.final_metadata, 
                                        self.final_label,
                                       on="s3_path").drop(['Unnamed: 0_x','Unnamed: 0_y'],axis = 1)
        self.transform = transform
        
    def __len__(self):
        return len(self.final_metadata)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        ## Load images from shared directory. There is no need to download images to local machine. ##
        local_path = self.final_metadata.iloc[idx]['local_path']
        url = self.final_metadata.iloc[idx]['url']
        try:
            image = Image.fromarray(io.imread(local_path))
            img_name = local_path
        except:
            image = Image.fromarray(io.imread(url))
            img_name = url
        uuid = self.final_data.iloc[idx, 1]
        timestamp = self.final_data.iloc[idx, 2]
        gps_lat = self.final_data.iloc[idx, 3]
        gps_lon = self.final_data.iloc[idx, 4]
        gps_alt = self.final_data.iloc[idx, 5]
        file_size = self.final_data.iloc[idx, 6]
        width = self.final_data.iloc[idx, 7]
        height = self.final_data.iloc[idx, 8]
        label = self.final_data.iloc[idx, -1]
        
        if self.transform:
            image = self.transform(image)

        sample = {'image': image, 'image_name': img_name, 'label': label, 'uuid': uuid, 'timestamp': timestamp, 'gps_lat': gps_lat, 'gps_lon': gps_lon, 'gps_alt': gps_alt, 'orig_file_size': file_size, 'orig_width': width, 'orig_height': height}

        return sample

final_dataset = MLCNN_Dataset(csv_file = 'final_metadata.csv', label_csv = 'changedLabels.csv')

scale = transforms.Resize(768)
crop = transforms.RandomCrop(512)
rotate = transforms.RandomRotation(25)
#flip_demo = transforms.RandomHorizontalFlip(1) # flip with 100% chance just to demo
flip = transforms.RandomHorizontalFlip(p=0.5)
composed = transforms.Compose([scale,
                               crop,
                               rotate,
                               flip,
                                transforms.ToTensor()])

Transformed_dataset = MLCNN_Dataset(csv_file = 'final_metadata.csv', label_csv = 'changedLabels.csv',
                                          transform = transforms.Compose([scale, crop, rotate, flip, transforms.ToTensor()]))

In [53]:
Transformed_dataset.final_data

Unnamed: 0,uuid,timestamp,gps_lat,gps_lon,gps_alt,file_size,width,height,s3_path,url,local_path,Unnamed: 0_y,label
0,f3a9a1d1324b747b16d5ddd88b1e582d4430d952,2015-10-08 12:33:11,33.642213,-79.835887,292.0,6388659.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0512_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/1013/...,183,[1 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
1,489efbec368d78a08e08aef15f21409745231328,2015-10-08 12:33:32,33.647137,-79.829637,306.0,6283783.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0516_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/1013/...,184,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
2,38b2cdf28c9f06cf3adac2f1dd0c50332e3a3ab9,2015-10-08 12:33:43,33.650238,-79.834105,301.0,6990298.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0521_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/1013/...,185,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
3,405c090c177e5f22cabc7ff4124dbbdd66bd3413,2015-10-08 12:35:10,33.611918,-79.836295,331.0,6525512.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0529_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/1013/...,186,[1 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
4,d3f72158df1bea3fbe99c1b9c0df2c1bc1ab1a94,2015-10-09 11:01:41,33.476848,-79.556262,388.0,6244449.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20211/DSC_1028_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/1013/...,279,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30096,c9afa098e3e99ed43c24895e0013068777113e35,2019-09-25 13:38:29,18.226017,-66.636667,871.0,16279550.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_3980...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/9168/...,30093,[1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
30097,6771dd58ec291908301fd6be9f0465331978bb03,2019-09-25 13:39:26,18.228258,-66.639778,833.0,18176802.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_3990...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/9168/...,30096,[0 0 1 0 0 0 0 1 0 0 0 0 0 0 0]
30098,b837de253a884ae225db21b354c79d48cc3064bf,2019-09-25 13:40:25,18.225620,-66.638297,888.0,17613483.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_4000...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/9168/...,30098,[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
30099,ace63d6d40b8867f0bf9b0dc2e0e5e27f84070e0,2019-09-25 13:38:41,18.227450,-66.642885,855.0,16897808.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_3982...,https://ladi.s3-us-west-2.amazonaws.com/Images...,/home/jovyan/course/ladi/Images/FEMA_CAP/9168/...,30094,[1 0 0 0 0 0 1 0 0 0 0 0 0 0 0]


In [34]:
Transformed_dataset.final_data.head()

Unnamed: 0_level_0,uuid,timestamp,gps_lat,gps_lon,gps_alt,file_size,width,height,s3_path,url,...,infrastructure:pipe,damage:washout,damage:landslide,infrastructure:railway,infrastructure:road,infrastructure:dam-levee,infrastructure:bridge,infrastructure:utility-line,infrastructure:communications-tower,infrastructure:water-tower
url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
https://ladi.s3-us-west-2.amazonaws.com/Images/FEMA_CAP/1013/20195/DSC_0512_b890dd85-ba6d-4886-bf96-8f64e3f3a244.jpg,f3a9a1d1324b747b16d5ddd88b1e582d4430d952,2015-10-08 12:33:11,33.642213,-79.835887,292.0,6388659.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0512_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,...,0,0,0,0,0,0,0,0,0,0
https://ladi.s3-us-west-2.amazonaws.com/Images/FEMA_CAP/1013/20195/DSC_0516_6bbe455e-9f71-473e-97e9-0edb4bd70092.jpg,489efbec368d78a08e08aef15f21409745231328,2015-10-08 12:33:32,33.647137,-79.829637,306.0,6283783.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0516_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,...,0,0,0,0,0,0,0,0,0,0
https://ladi.s3-us-west-2.amazonaws.com/Images/FEMA_CAP/1013/20195/DSC_0521_40dfddab-0b8e-474c-ac13-e91161495d37.jpg,38b2cdf28c9f06cf3adac2f1dd0c50332e3a3ab9,2015-10-08 12:33:43,33.650238,-79.834105,301.0,6990298.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0521_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,...,0,0,0,0,0,0,0,0,0,0
https://ladi.s3-us-west-2.amazonaws.com/Images/FEMA_CAP/1013/20195/DSC_0529_4b2d9291-c0d8-4e26-aeed-5ff03f83bd6a.jpg,405c090c177e5f22cabc7ff4124dbbdd66bd3413,2015-10-08 12:35:10,33.611918,-79.836295,331.0,6525512.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20195/DSC_0529_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,...,0,0,0,0,0,0,0,0,0,0
https://ladi.s3-us-west-2.amazonaws.com/Images/FEMA_CAP/1013/20211/DSC_1028_6ac36fc5-8160-42ca-8f7c-da5a1b2b3288.jpg,d3f72158df1bea3fbe99c1b9c0df2c1bc1ab1a94,2015-10-09 11:01:41,33.476848,-79.556262,388.0,6244449.0,6000.0,4000.0,s3://ladi/Images/FEMA_CAP/1013/20211/DSC_1028_...,https://ladi.s3-us-west-2.amazonaws.com/Images...,...,0,0,0,0,0,0,0,0,0,0


In [45]:
final_data_dataloader_input = pd.read_csv('changedLabels.csv').drop('Unnamed: 0',axis = 1)

In [46]:
final_data_dataloader_input

Unnamed: 0,s3_path,label
0,s3://ladi/Images/FEMA_CAP/1013/20145/DSC_0020_...,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
1,s3://ladi/Images/FEMA_CAP/1013/20145/DSC_0028_...,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
2,s3://ladi/Images/FEMA_CAP/1013/20145/DSC_0035_...,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
3,s3://ladi/Images/FEMA_CAP/1013/20145/DSC_0042_...,[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
4,s3://ladi/Images/FEMA_CAP/1013/20147/DSC_1575_...,[1 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
...,...,...
30096,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_3990...,[0 0 1 0 0 0 0 1 0 0 0 0 0 0 0]
30097,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_3997...,[1 0 1 0 0 0 1 0 0 0 0 0 0 0 0]
30098,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_4000...,[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
30099,s3://ladi/Images/FEMA_CAP/9168/616298/DSC_4005...,[0 0 1 0 0 0 0 1 0 0 0 0 0 0 0]


In [None]:
batch_size = 256
test_split_ratio = .3
shuffle_dataset = True
random_seed= 76

# Creating data indices for training and validation splits:
dataset_size = len(Transformed_dataset)
indices = list(range(dataset_size))
split = int(np.floor(test_split_ratio * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]

# Creating data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = torch.utils.data.DataLoader(Transformed_dataset, batch_size=batch_size,
                                           sampler=train_sampler, num_workers = 4)
test_loader = torch.utils.data.DataLoader(Transformed_dataset, batch_size=64,
                                                sampler=test_sampler, num_workers = 0)

from glob import glob
from tqdm import tqdm
files = glob('img_align_celeba/*.jpg')
shuffle = np.random.permutation(len(files))
for i in ['train', 'valid']:
    os.mkdir(os.path.join('/tmp/', i))
    
valid_dict = {}
valid_file_names = []
for i in tqdm(shuffle[:60780]):
    file_name = files[i].split('/')[-1]
    labels = np.array(label_df[label_df.index==file_name])
    valid_dict[file_name] = labels
    valid_file_names.append(file_name)
    os.rename(files[i], os.path.join('/tmp/', '/tmp/valid', file_name))
valid_df.index = valid_file_names
valid_df.columns = ['labels']

## uncomment the below given line to check the head of the dataframe
# valid_df.head()

In [None]:
#model = make_model('resnet18', num_classes=2, pretrained=True).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum = 0.9, nesterov=True)