# Installing Packages

In [None]:
!pip install pyproj
!pip install rasterio
!pip install geopandas
!pip install pandas
!pip install shapely
!pip install os
!pip install fastai
!pip install torchvision
!pip install efficientnet_pytorch
!pip install keras

# Importing Libraries

## For Image PreProcessing

In [None]:
from pyproj import Proj, transform
from rasterio.mask import mask
import geopandas as gpd
import pandas as pd
import rasterio
import shapely
import os

## For PreTrained Models

In [None]:
from fastai.callbacks import SaveModelCallback
from efficientnet_pytorch import EfficientNet
from fastai.vision.learner import model_meta
from fastai.metrics import error_rate
from fastai.vision.models import *
from torchvision.models import *
from fastai.vision import *

# Image Pre-Processing


In [None]:
#Path of the raw data folder with the GeoTiff, GeoJSON and JSON files.
source_path = 'stac'

#Path to the extracted images.
dest_path = 'Processed_data'

#To track the number of images that are processed and their order.
testImg_pos = 0

#The countries whose images are being processed
countries = ['colombia','guatemala','st_lucia']

#The countries as keys and their respective epsg ids as values
epsg = {'colombia':'32618', 'guatemala':'32616', 'st_lucia':'32620'}

#The regions that are processesd with their respective countries as their keys
regions = {'st_lucia':['dennery'], 'colombia':['borde_rural','borde_soacha'], 
           'guatemala':['mixco_1_and_ebenezer','mixco_3']}

#For ease of looping
cases = ['train', 'test']

#To store the ids of the rooftops in the order that they where extracted
ids = []

## Creating Directories to store extracted images

In [None]:
materials = ["concrete_cement","healthy_metal","incomplete","irregular_metal","other"]
for name in materials:
    os.makedirs(dest_path+'/train/'+name)
os.makedirs(dest_path+'/test/data')

## Extacting rooftops from the GeoTIFF image

In [None]:
for country in countries:
    inProj = Proj(init = 'epsg:4326')
    outProj = Proj(init = 'epsg:'+epsg[country])
    for region in regions[country]:
        src = rasterio.open(source_path+'/'+country+'/'+region+'/'+region+'_ortho-cog.tif')
        for case in cases:
            aoi_geojson = gpd.read_file(data_path+'/'+country+'/'+region+'/'+case+'-'+region+'.geojson')
            for i in range(aoi_geojson.shape[0]):
                cord = aoi_geojson['geometry'][i].bounds
                (x1,y1,x2,y2) = cord
                x1_new,y1_new = transform(inProj,outProj,x1,y1)
                x2_new,y2_new = transform(inProj,outProj,x2,y2)
                ar_new = shapely.geometry.box(x1_new, y1_new, x2_new, y2_new, ccw=True)
                crop, cropTransform = mask(src, [ar_new], crop=True)
                meta = src.meta.copy()
                meta.update({'transform':cropTransform, 'height':crop.shape[1], 'width':crop.shape[2]})
                if case == 'train':
                    with rasterio.open(dest_path+'/'+case+'/'+aoi_geojson['roof_material'][i]+'/'+aoi_geojson['id'][i]+'.tif', 'w', **meta) as dst:
                        dst.write(crop)
                if case == 'test':
                    testImg_pos = testImg_pos+1
                    with rasterio.open(dest_path+'/'+case+'/data/'+str(testImg_pos)+'.tif', 'w', **meta) as dst:
                        dst.write(crop)
                        ids.append(aoi_geojson['id'][i])

# Classification Using PreTrained Models

## Loading the Image Data Bunch

In [None]:
np.random.seed(2) #Everytime the same validation bunch is taken

data = ImageDataBunch.from_folder(dest_path,train='train', test = 'test',valid_pct = 0.2,  ds_tfms=get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.), 
                              size=256, bs=32).normalize(imagenet_stats)

## Funtion for Finding the optimum learning rate (Future Work)

In [None]:
'''
Giving a much Larger Value than what is being expected.

def find_appropriate_lr(model:Learner, lr_diff:int = 15, loss_threshold:float = .05, adjust_value:float = 1, plot:bool = False) -> float:
    #Run the Learning Rate Finder
    model.lr_find()
    
    #Get loss values and their corresponding gradients, and get lr values
    losses = np.array(model.recorder.losses)
    assert(lr_diff < len(losses))
    loss_grad = np.gradient(losses)
    lrs = model.recorder.lrs
    
    #Search for index in gradients where loss is lowest before the loss spike
    #Initialize right and left idx using the lr_diff as a spacing unit
    #Set the local min lr as -1 to signify if threshold is too low
    r_idx = -1
    l_idx = r_idx - lr_diff
    while (l_idx >= -len(losses)) and (abs(loss_grad[r_idx] - loss_grad[l_idx]) > loss_threshold):
        local_min_lr = lrs[l_idx]
        r_idx -= 1
        l_idx -= 1

    lr_to_use = local_min_lr * adjust_value
    
    if plot:
        # plots the gradients of the losses in respect to the learning rate change
        plt.plot(loss_grad)
        plt.plot(len(losses)+l_idx, loss_grad[l_idx],markersize=10,marker='o',color='red')
        plt.ylabel("Loss")
        plt.xlabel("Index of LRs")
        plt.show()

        plt.plot(np.log10(lrs), losses)
        plt.ylabel("Loss")
        plt.xlabel("Log 10 Transform of Learning Rate")
        loss_coord = np.interp(np.log10(lr_to_use), np.log10(lrs), losses)
        plt.plot(np.log10(lr_to_use), loss_coord, markersize=10,marker='o',color='red')
        plt.show()
        
    return lr_to_use '''

## Loading the PreTrained Model and Training it's final layers

In [None]:
model = EfficientNet.from_pretrained('efficientnet-b7', num_classes=data.c)

model._fc = nn.Linear(in_features=2560, out_features=5, bias=True) #Attaching a Linear layer at the end to support 5 classes

learn = Learner(data, model, metrics=[error_rate]).mixup().to_fp16()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

#Step of manually choosing the learning rate
lr = 1e-05

In [None]:
#The Function needs to be improvised
#lr = find_appropriate_lr(learn, plot=True)

In [None]:
learn.fit_one_cycle(20, max_lr = slice(lr) ,callbacks=[SaveModelCallback(learn)])
learn.load('bestmodel')
learn.save('efficient_net_b7_v2_best')

#Analysis of the best model found
learn.recorder.plot_losses()
learn.recorder.plot_lr()

## Making all the layers of the model trainable

In [None]:
learn.unfreeze() # Making all the layers within the pretrained model trainable

In [None]:
#training the unfreezed model
learn.lr_find()
learn.recorder.plot(suggestion=True)

#Choosing the learning rate manually
lr = 1e-05

In [None]:
#lr = find_appropriate_lr(learn, plot=True) (Function needs to be refined and optimised)

In [None]:
learn.fit_one_cycle(10, max_lr=slice(lr), callbacks=[SaveModelCallback(learn)])

learn.load('bestmodel')
learn.save('efficient_net_b7_v2_unfreeze_best')

# Predicting the results

## For Pretrained Model

In [None]:
result = []
for img_id in ids:
    img = open_image(dest_path+'/'+'test/'+img_id+'.tif')
    result.append(np.array(learn.predict(img)[2]))

# Saving the Results in .csv

In [None]:
df = pd.DataFrame(result, columns = ['concrete_cement','healthy_metal','incomplete','irregular_metal','other'])
df.insert(0,'id',ids)
df.to_csv(dest_path+'/'+'Submission Form.csv',index = False)