In [23]:
import os
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

Preprocessing the dataset into one hot encoded list

In [24]:
rebuild_data = True
if(os.path.exists("Training_set.npy") and os.path.exists("Validation_set.npy")):
    rebuild_data = False

class Preprocess():
    Img_size = 50
    earthquake = "train+val/train/Earthquake"
    flood = "train+val/train/Flood"
    hurricane = "train+val/train/Hurricane"
    landslides = "train+val/train/Landslides"
    wildfire = "train+val/train/Wildfire"

    count_earthquake = 0
    count_flood = 0
    count_hurricane = 0
    count_landslides = 0
    count_wildfire = 0

    earthquake_val = "train+val/validation/Earthquake" 
    flood_val = "train+val/validation/Flood"
    hurricane_val = "train+val/validation/Hurricane"
    landslides_val = "train+val/validation/Landslides"
    wildfire_val = "train+val/validation/Wildfire"

    Train_set = []
    Val_set = []
    labels = {earthquake:0 , flood:1 , hurricane:2 , landslides:3 , wildfire:4}
    labels_val = {earthquake_val:0 , flood_val:1 , hurricane_val:2 , landslides_val:3 , wildfire_val:4}

    def make_train_set(self):
        for label in self.labels:
            for f in tqdm(os.listdir(label)):
                try:
                    path = os.path.join(label , f)
                    img = cv2.imread(path , cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img , (self.Img_size , self.Img_size))
                    img_label = np.eye(5)[self.labels[label]]
                    self.Train_set.append([np.array(img) , img_label])
                    if label == self.earthquake:
                        self.count_earthquake+=1
                    elif label == self.flood:
                        self.count_flood += 1
                    elif label == self.hurricane:
                        self.count_hurricane += 1
                    elif label == self.landslides:
                        self.count_landslides += 1
                    elif label == self.wildfire:
                        self.count_wildfire += 1
                except Exception as e:
                    pass

        np.random.shuffle(self.Train_set)
        self.Train_set = np.asarray(self.Train_set , dtype = object)
        np.save("Training_set.npy" , self.Train_set)
        print("earthquake:" , self.count_earthquake)
        print("flood:" , self.count_flood)
        print("hurricane:" , self.count_hurricane)
        print("landslides:" , self.count_landslides)
        print("wildfire:" , self.count_wildfire)

    def make_val_set(self):
        for label in self.labels_val:
            for f in tqdm(os.listdir(label)):
                try:
                    path = os.path.join(label , f)
                    img = cv2.imread(path , cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img , (self.Img_size , self.Img_size))
                    img_label = np.eye(5)[self.labels_val[label]]
                    self.Val_set.append([np.array(img) , img_label])
                except Exception as e:
                    pass

        np.random.shuffle(self.Val_set)
        self.Val_set = np.asarray(self.Val_set , dtype = object)
        np.save("Validation_set.npy" , self.Val_set)

if rebuild_data:
    preprocess = Preprocess()
    preprocess.make_train_set()
    preprocess.make_val_set()

In [25]:
training_set = np.load("Training_set.npy" , allow_pickle=True)
validation_set = np.load("Validation_set.npy" , allow_pickle=True)

In [26]:
class Net(nn.Module):
    def __init__(self ):
        super().__init__()
        self.conv1 = nn.Conv2d(1 , 32 , kernel_size=5)
        self.conv2 = nn.Conv2d(32 , 64 , kernel_size=5)
        self.conv3 = nn.Conv2d(64 , 128 , kernel_size=5)

        self.flatten_dim = None
        ##definig a temp x to get the flatten dimensions after a forward pass
        x = torch.randn(50 , 50).view(-1 , 1 , 50 , 50)
        self.convf(x)

        self.fc1 = nn.Linear(self.flatten_dim , 512)
        self.fc2 = nn.Linear(512 , 5)
        

    def convf(self , x ):
        x = F.max_pool2d(F.relu(self.conv1(x)) , (2 , 2))
        x = F.max_pool2d(F.relu(self.conv2(x)) , (2 , 2))
        x = F.max_pool2d(F.relu(self.conv3(x)) , (2 , 2))

        if self.flatten_dim == None:
            self.flatten_dim = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]

        return x

    def forward(self , x):
        x = self.convf(x)
        x = x.view(-1 , self.flatten_dim)
        
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return F.softmax(x ,dim = 1 )

net = Net()

In [27]:
training_set_x = torch.tensor(np.array([i[0] for i in training_set])).view(-1 , 50 , 50)
training_set_x = training_set_x/255.0
training_set_y = torch.tensor(np.array([i[1] for i in training_set]))

validation_set_x = torch.tensor(np.array([i[0] for i in validation_set])).view(-1 , 50 , 50)
validation_set_x = validation_set_x/255.0
validation_set_y = torch.tensor(np.array([i[1] for i in validation_set]))


In [28]:
BATCH_SIZE = 10
EPOCS = 5
LEARNING_RATE = 0.001

optimizer = optim.Adam(net.parameters() , LEARNING_RATE)
loss_function = nn.MSELoss()

def train(net):
    for epoc in range(EPOCS):
        for i in tqdm(range(0 , len(training_set_x) , BATCH_SIZE)):
            x = training_set_x[i:i+BATCH_SIZE].view(-1 , 1 , 50 , 50).float()
            y = training_set_y[i:i+BATCH_SIZE].float()
            net.zero_grad()
            output = net(x)
            loss = loss_function(output, y)
            loss.backward()
            optimizer.step()
        print(f"loss after {epoc+1} iterations: {loss}")

In [29]:
def test(net):
    correct = 0
    total = 0   
    with torch.no_grad():
        for i in tqdm(range(len(validation_set_x))):
            actual_class = torch.argmax(validation_set_y[i])
            predicted_class = torch.argmax(net(validation_set_x[i].view(-1 , 1 , 50 , 50)))
            if predicted_class==actual_class:
                correct +=1
            total+=1
    print(f'Accuracy on test set: {round(correct/total*100 , 3)}')

In [30]:
train(net)

100%|██████████| 549/549 [00:05<00:00, 95.38it/s] 


loss after 1 iterations: 0.227981835603714


100%|██████████| 549/549 [00:05<00:00, 101.61it/s]


loss after 2 iterations: 0.2201642096042633


100%|██████████| 549/549 [00:05<00:00, 101.68it/s]


loss after 3 iterations: 0.21303586661815643


100%|██████████| 549/549 [00:05<00:00, 101.40it/s]


loss after 4 iterations: 0.21627095341682434


100%|██████████| 549/549 [00:05<00:00, 101.80it/s]

loss after 5 iterations: 0.21183332800865173





In [31]:
test(net)

100%|██████████| 1284/1284 [00:00<00:00, 1289.96it/s]

Accuracy on test set: 56.698





In [37]:
def forw_pass(x , y , train = False):
    if train:
        net.zero_grad()
    try:
        output = net(x.view(-1 , 1, 50 , 50))
        matches = [torch.argmax(i)==torch.argmax(j) for i , j in zip(y , output)]
        accuracy = matches.count(True)/len(matches)*100
        if train:
            loss = loss_function(output , y)
            loss.backward()
            optimizer.step()

        print(f"Accuracy: {accuracy}")

    except Exception as e:
        print("Wrong image dimensions!!")


In [38]:
forw_pass(validation_set_x , validation_set_y)

Accuracy: 56.69781931464174


<h2>Test on your own image</h2>

In [39]:
def predictCatOrDog(path):
    img = cv2.imread(path , cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img , (50 , 50))

    img = torch.tensor(img).view(-1 , 1 , 50 , 50)
    img = img/255.0

    prediction = torch.argmax(net(img))
    if prediction==0:
        print("Earthquake")
    elif prediction == 1:
        print("Flood")
    elif prediction == 2:
        print("Hurricane")
    elif prediction == 3:
        print("landslide")
    elif prediction == 4:
        print("wildfire")


Flood
