In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
plt.rcParams["figure.figsize"] = (6, 6)
plt.rcParams["figure.dpi"] = 200
plt.rcParams["font.size"] = 14
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid': False})
plt.rcParams['image.cmap'] = 'viridis'
import warnings
warnings.filterwarnings("ignore")

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image
from skimage.io import imread
from skimage.util import montage
from tqdm import tqdm
tqdm.pandas() # hack progressbars into pandas
montage_rgb = lambda x, **kwargs: np.stack([montage(x[:, :, :, i], **kwargs) for i in range(x.shape[3])], -1)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import shutil

In [None]:
!pip install -U -q kaggle
!mkdir -p ~/.kaggle
!echo '{"username":"cassidygasteiger","key":"6c9923a920bf29bf27fe448f3041bf1e"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d cassidygasteiger/new-hurricane-damage-sat-images

Downloading new-hurricane-damage-sat-images.zip to /content
100% 210M/210M [00:10<00:00, 28.4MB/s]
100% 210M/210M [00:10<00:00, 20.7MB/s]


In [None]:
!unzip new-hurricane-damage-sat-images.zip -d /content/new-hurricane-damage-sat-images-unzippedA

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4593.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4594.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4595.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4596.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4597.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4598.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4599.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/46.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/460.jpeg  
  inflating: /content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/imagery/4600.jpeg  
  inflat

In [None]:
#make a df of the image paths
satellite_dir = Path('../content/new-hurricane-damage-sat-images-unzippedA/')
image_df = pd.DataFrame({'path': list(satellite_dir.glob('**/*.jp*g'))})
image_df.head(5)

Unnamed: 0,path
0,../content/new-hurricane-damage-sat-images-unz...
1,../content/new-hurricane-damage-sat-images-unz...
2,../content/new-hurricane-damage-sat-images-unz...
3,../content/new-hurricane-damage-sat-images-unz...
4,../content/new-hurricane-damage-sat-images-unz...


In [None]:
#combine image path df with labels
labels = pd.read_csv('../content/new-hurricane-damage-sat-images-unzippedA/PRJ-2974/distance_elevation.csv')
#labels.head(5)
labels['img_id'] = labels['img_id'].astype(str)
image_df['img_id'] = image_df['path'].apply(lambda x: str(x.stem))
merged_df = pd.merge(image_df, labels, on = 'img_id')
merged_df.drop(columns = ['img_id'], inplace=True)
merged_df = merged_df.drop(columns = ['Unnamed: 0'])
merged_df.head(5)

Unnamed: 0,path,distance,complete_post_catalog_id,label,coord_x,coord_y,elevation
0,../content/new-hurricane-damage-sat-images-unz...,4857.074375,105001000B95E100,Damaged,-95.651464,29.835397,33.209309
1,../content/new-hurricane-damage-sat-images-unz...,3201.677515,105001000B95E100,Damaged,-95.610534,29.762495,22.919876
2,../content/new-hurricane-damage-sat-images-unz...,174287.6782,105001000B9D7F00,Damaged,-93.805502,30.041813,0.781287
3,../content/new-hurricane-damage-sat-images-unz...,7005.253648,105001000B95E100,Damaged,-95.627662,29.857291,33.102058
4,../content/new-hurricane-damage-sat-images-unz...,4570.787599,105001000B95E100,Damaged,-95.652583,29.832338,32.782433


In [None]:
X = merged_df['path']
y = merged_df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
print(X_train)

19343    ../content/new-hurricane-damage-sat-images-unz...
10352    ../content/new-hurricane-damage-sat-images-unz...
14443    ../content/new-hurricane-damage-sat-images-unz...
15267    ../content/new-hurricane-damage-sat-images-unz...
1593     ../content/new-hurricane-damage-sat-images-unz...
                               ...                        
24197    ../content/new-hurricane-damage-sat-images-unz...
2163     ../content/new-hurricane-damage-sat-images-unz...
22676    ../content/new-hurricane-damage-sat-images-unz...
21069    ../content/new-hurricane-damage-sat-images-unz...
6211     ../content/new-hurricane-damage-sat-images-unz...
Name: path, Length: 14625, dtype: object


In [None]:
train = pd.DataFrame(zip(X_train, y_train), columns = ['path', 'label'])
test = pd.DataFrame(zip(X_test, y_test), columns = ['path', 'label'])
val = pd.DataFrame(zip(X_val, y_val), columns = ['path', 'label'])

In [None]:
train_damage = train[train['label'] == 'Damaged']
train_nondamage = train[train['label'] == 'Undamaged']
test_damage = test[test['label'] == 'Damaged']
test_nondamage = test[test['label'] == 'Undamaged']
val_damage = val[val['label'] == 'Damaged']
val_nondamage = val[val['label'] == 'Undamaged']

In [None]:
import os

os.makedirs("/content/train/damage/", exist_ok=True)

for i in train_damage.index:
  shutil.move(train_damage['path'][i], "/content/train/damage/")

In [None]:
os.makedirs("/content/test/damage/", exist_ok=True)

for i in test_damage.index:
  shutil.move(test_damage['path'][i], "test/damage/")

In [None]:
os.makedirs("/content/validation/damage/", exist_ok=True)

for i in val_damage.index:
  shutil.move(val_damage['path'][i], "validation/damage/")

In [None]:
os.makedirs("/content/train/no_damage/", exist_ok=True)

for i in train_nondamage.index:
  shutil.move(train_nondamage['path'][i], "train/no_damage/")

In [None]:
os.makedirs("/content/test/no_damage/", exist_ok=True)

for i in test_nondamage.index:
  shutil.move(test_nondamage['path'][i], "test/no_damage/")

In [None]:
os.makedirs("/content/validation/no_damage/", exist_ok=True)

for i in val_nondamage.index:
  shutil.move(val_nondamage['path'][i], "validation/no_damage/")

In [None]:
import torch

if torch.cuda.is_available():
    print("GPU is available!")
    device = torch.device("cuda")
else:
    print("GPU is not available. Using CPU.")
    device = torch.device("cpu")

GPU is available!


In [None]:
original_dataset_dir = '/content'

In [None]:
train_damage_dir = original_dataset_dir + '/train/damage'
validation_damage_dir = original_dataset_dir + '/validation/damage'
test_damage_dir = original_dataset_dir + '/test/damage'

train_nodamage_dir = original_dataset_dir + '/train/no_damage'
validation_nodamage_dir = original_dataset_dir + '/validation/no_damage'
test_nodamage_dir = original_dataset_dir + '/test/no_damage'

In [None]:
print('total training damage images: ',len(os.listdir(train_damage_dir)))
print('total validation damage images: ',len(os.listdir(validation_damage_dir)))
print('total test damage images: ',len(os.listdir(test_damage_dir)))

print('total training no damage images: ',len(os.listdir(train_nodamage_dir)))
print('total validation no damage images: ',len(os.listdir(validation_nodamage_dir)))
print('total test no damage images: ',len(os.listdir(test_nodamage_dir)))

total training damage images:  8374
total validation damage images:  2831
total test damage images:  2788
total training no damage images:  6251
total validation no damage images:  2045
total test no damage images:  2088


# KMeans for Clustering Unlabelled Images

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.datasets import mnist

from skimage import io
from sklearn.cluster import KMeans
import numpy as np
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset

In [None]:
x_train, y_train, x_test, y_test = [], [], [], []

In [None]:
#new directory to store the compressed images
os.makedirs("/content/compressed_train/damage", exist_ok=True)
os.makedirs("/content/compressed_train/no_damage", exist_ok=True)
os.makedirs("/content/compressed_test/damage", exist_ok=True)
os.makedirs("/content/compressed_test/no_damage", exist_ok=True)

In [None]:
compressed_train_damage = original_dataset_dir + '/compressed_train/damage'
compressed_train_nodamage = original_dataset_dir + '/compressed_train/no_damage'
compressed_test_damage = original_dataset_dir + '/compressed_test/damage'
compressed_test_nodamage = original_dataset_dir + '/compressed_test/no_damage'

In [None]:
# loop over the train damage images
for img_file in os.listdir(train_damage_dir):
    # load the image
    img_path = os.path.join(train_damage_dir, img_file)
    img = Image.open(img_path)
    new_img = img.convert('L')
    
    new_path = os.path.join(compressed_train_damage, img_file)
    new_img.save(new_path)

    # convert the image to a numpy array and flatten it
    img_arr = np.array(new_img)
    # img_arr = img_arr.reshape(img_arr.shape[0]*img_arr.shape[1])
    x_train.append(img_arr)

    # create y_train
    y_train.append(1)


In [None]:
# loop over the train damage images
for img_file in os.listdir(train_nodamage_dir):
    # load the image
    img_path = os.path.join(train_nodamage_dir, img_file)
    img = Image.open(img_path)
    new_img = img.convert('L')
    
    new_path = os.path.join(compressed_train_nodamage, img_file)
    new_img.save(new_path)

    # convert the image to a numpy array and flatten it
    img_arr = np.array(new_img)
    # img_arr = img_arr.reshape(img_arr.shape[0]*img_arr.shape[1])
    x_train.append(img_arr)

    # create y_train
    y_train.append(0)

In [None]:
# loop over the test damage images
for img_file in os.listdir(test_damage_dir):
    # load the image
    img_path = os.path.join(test_damage_dir, img_file)
    img = Image.open(img_path)
    new_img = img.convert('L')
    
    new_path = os.path.join(compressed_test_damage, img_file)
    new_img.save(new_path)

    # convert the image to a numpy array and flatten it
    img_arr = np.array(new_img)
    # img_arr = img_arr.reshape(img_arr.shape[0]*img_arr.shape[1])
    x_test.append(img_arr)

    # create y_train
    y_test.append(1)


In [None]:
# loop over the train damage images
for img_file in os.listdir(test_nodamage_dir):
    # load the image
    img_path = os.path.join(test_nodamage_dir, img_file)
    img = Image.open(img_path)
    new_img = img.convert('L')
    
    new_path = os.path.join(compressed_test_nodamage, img_file)
    new_img.save(new_path)

    # convert the image to a numpy array and flatten it
    img_arr = np.array(new_img)
    # img_arr = img_arr.reshape(img_arr.shape[0]*img_arr.shape[1])
    x_test.append(img_arr)

    # create y_train
    y_test.append(0)


In [None]:
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [None]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(14625, 256, 256) (4876, 256, 256) (14625,) (4876,)


In [None]:
# normalize the image data
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# reshape in the input data for the model
x_train = x_train.reshape(len(x_train), 256, 256, 1)
x_test = x_test.reshape(len(x_test), 256, 256, 1)
x_test.shape

(4876, 256, 256, 1)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
# from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

class ConvAutoEncoder(Model):
  def __init__(self):
    super(ConvAutoEncoder, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Input(shape=(256, 256, 1)), 
      layers.Conv2D(8, (3,3), activation='relu', padding='same', strides=2),
      layers.Conv2D(4, (3,3), activation='relu', padding='same', strides=2)])
    
    self.decoder = tf.keras.Sequential([
      layers.Input(shape=(64, 64, 4)),
      layers.Conv2DTranspose(4, kernel_size=3, strides=2, activation='relu', padding='same'),
      layers.Conv2DTranspose(8, kernel_size=3, strides=2, activation='relu', padding='same'),
      layers.Conv2D(1, kernel_size=(3,3), activation='sigmoid', padding='same')])
    
  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

autoencoder = ConvAutoEncoder()

In [None]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
autoencoder.encoder.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 128, 8)       80        
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 64, 4)         292       
                                                                 
Total params: 372
Trainable params: 372
Non-trainable params: 0
_________________________________________________________________


In [None]:
autoencoder.decoder.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_transpose (Conv2DTra  (None, 128, 128, 4)      148       
 nspose)                                                         
                                                                 
 conv2d_transpose_1 (Conv2DT  (None, 256, 256, 8)      296       
 ranspose)                                                       
                                                                 
 conv2d_2 (Conv2D)           (None, 256, 256, 1)       73        
                                                                 
Total params: 517
Trainable params: 517
Non-trainable params: 0
_________________________________________________________________


In [None]:
history = autoencoder.fit(x_train, x_train, epochs=10, validation_data=(x_test, x_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# predict the results from model (get compressed images)
pred = autoencoder.predict(x_test)



ResourceExhaustedError: ignored

In [None]:
history.history

In [None]:
train_loss_values = history.history['loss']
val_loss_values = history.history['val_loss']

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(range(len(train_loss_values)), train_loss_values, color='green', label='train loss')
plt.plot(range(len(val_loss_values)), val_loss_values, color='red', label='val loss')
plt.legend()
plt.title("Train & Val loss plot")
plt.xlabel('Number of Epochs')
plt.ylabel('Loss')

plt.show()

In [None]:
# randomly select input image
index = np.random.randint(len(x_test))
print(y_test[index])
# plot the image
plt.imshow(x_test[index].reshape(256,256))
plt.gray()

In [None]:
plt.imshow(pred[index].reshape(256,256))
plt.gray()

In [None]:
# set the number of clusters
num_clusters = 2

In [None]:
pred_reshaped = pred.reshape(pred.shape[0],pred.shape[1]*pred.shape[2])
pred_reshaped.shape

In [None]:
index = np.random.randint(len(x_test))
print(y_test[index])
pred_org = pred_reshaped[index].reshape(256, 256)
plt.imshow(pred_org.reshape(256,256))
plt.gray()

In [None]:
# # perform k-means clustering and replace pixel values with nearby centroids
kmeans = KMeans(n_clusters=num_clusters)
%time clusters = kmeans.fit_predict(pred_reshaped)

kmeans.cluster_centers_.shape

In [None]:
cluster_map = pd.DataFrame()
cluster_map['data_index'] = y_test
cluster_map['cluster'] = kmeans.labels_

In [None]:
# mapping labels from cluster to original labels
def get_reference_dict(clusters,data_label):
    reference_label = {}
    # For loop to run through each label of cluster label
    for i in range(len(np.unique(clusters))):
        index = np.where(clusters == i,1,0)
        num = np.bincount(data_label[index==1]).argmax()
        reference_label[i] = num
    return reference_label
    
# Mapping predictions to original labels
def get_labels(clusters,refernce_labels):
    temp_labels = np.random.rand(len(clusters))
    for i in range(len(clusters)):
        temp_labels[i] = reference_labels[clusters[i]]
    return temp_labels

In [None]:
reference_labels = get_reference_dict(clusters,y_test)
predicted_labels = get_labels(clusters,reference_labels)
print("The accuracy of the model is: ", accuracy_score(predicted_labels,y_test))

In [None]:
cluster_map['predicted_labels'] = predicted_labels

In [None]:
# Getting Damaged Image
cluster_map.head()

In [None]:
np.where(predicted_labels == y_test)

In [None]:
# pred_org = pred_reshaped[0].reshape(256, 256)
# plt.imshow(pred_org.reshape(256,256))
plt.imshow(x_test[1].reshape(256,256))

In [None]:
plt.imshow(x_test[4].reshape(256,256))