# Importing Libraries

In [None]:
from pyproj import Proj, transform
from rasterio.mask import mask
import geopandas as gpd
import pandas as pd
import rasterio
import shapely
import os

In [None]:
from fastai.callbacks import SaveModelCallback
from efficientnet_pytorch import EfficientNet
from fastai.vision.learner import model_meta
from fastai.metrics import error_rate
from fastai.vision.models import *
from torchvision.models import *
from fastai.vision import *
import pretrainedmodels
from utils import *
import sys

# Image Pre-Processing


In [None]:
materials = ["concrete_cement","healthy_metal","incomplete","irregular_metal","other"]
for name in materials:
    os.makedirs('/home/cupgreek/Documents/Processed_data/train/'+name)
os.makedirs('/home/cupgreek/Documents/Processed_data/test/data')

#Function to extract the rooftop image from the source image and save it in '.tif' format
num = 0
epsg = {'colombia':'32618', 'guatemala':'32616', 'st_lucia':'32620'}
regions = {'st_lucia':['dennery'], 'colombia':['borde_rural','borde_soacha'], 
           'guatemala':['mixco_1_and_ebenezer','mixco_3']}
ids = []

for country in ['colombia','guatemala','st_lucia']:
    inProj = Proj(init = 'epsg:4326')
    outProj = Proj(init = 'epsg:'+epsg[country])
    for region in regions[country]:
        src = rasterio.open('/home/cupgreek/Documents/stac/'+country+'/'+region+'/'+region+'_ortho-cog.tif')
        for case in ['train', 'test']:
            aoi_geojson = gpd.read_file('/home/cupgreek/Documents/stac/'+country+'/'+region+'/'+case+'-'+region+'.geojson')
            for i in range(aoi_geojson.shape[0]):
                cord = aoi_geojson['geometry'][i].bounds
                (x1,y1,x2,y2) = cord
                x1_new,y1_new = transform(inProj,outProj,x1,y1)
                x2_new,y2_new = transform(inProj,outProj,x2,y2)
                ar_new = shapely.geometry.box(x1_new, y1_new, x2_new, y2_new, ccw=True)
                crop, cropTransform = mask(src, [ar_new], crop=True)
                meta = src.meta.copy()
                meta.update({'transform':cropTransform, 'height':crop.shape[1], 'width':crop.shape[2]})
                if case == 'train':
                    with rasterio.open('/home/cupgreek/Documents/Processed_data/'+case+'/'+aoi_geojson['roof_material'][i]+'/'+aoi_geojson['id'][i]+'.tif', 'w', **meta) as dst:
                        dst.write(crop)
                if case == 'test':
                    num = num+1
                    with rasterio.open('/home/cupgreek/Documents/Processed_data/'+case+'/data'+str(num)+'.tif', 'w', **meta) as dst:
                        dst.write(crop)
                        ids.append(aoi_geojson['id'][i])        

# Loading the Image Data Bunch

In [None]:
np.random.seed(2) # Everytime same validation bunch is taken
data = ImageDataBunch.from_folder('/content/drive/My Drive/Satellite data/data_processed/train',train='', test = '/content/drive/My Drive/Satellite data/data_processed/test',valid_pct = 0.2,  ds_tfms=get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.), 
                              size=256, bs=32).normalize(imagenet_stats)

# Classification Using PreTrained Models

In [None]:
#Funtion for Finding the optimum learning rate (For now doing it manually)
'''def find_appropriate_lr(model:Learner, lr_diff:int = 15, loss_threshold:float = .05, adjust_value:float = 1, plot:bool = False) -> float:
    #Run the Learning Rate Finder
    model.lr_find()
    
    #Get loss values and their corresponding gradients, and get lr values
    losses = np.array(model.recorder.losses)
    assert(lr_diff < len(losses))
    loss_grad = np.gradient(losses)
    lrs = model.recorder.lrs
    
    #Search for index in gradients where loss is lowest before the loss spike
    #Initialize right and left idx using the lr_diff as a spacing unit
    #Set the local min lr as -1 to signify if threshold is too low
    r_idx = -1
    l_idx = r_idx - lr_diff
    while (l_idx >= -len(losses)) and (abs(loss_grad[r_idx] - loss_grad[l_idx]) > loss_threshold):
        local_min_lr = lrs[l_idx]
        r_idx -= 1
        l_idx -= 1

    lr_to_use = local_min_lr * adjust_value
    
    if plot:
        # plots the gradients of the losses in respect to the learning rate change
        plt.plot(loss_grad)
        plt.plot(len(losses)+l_idx, loss_grad[l_idx],markersize=10,marker='o',color='red')
        plt.ylabel("Loss")
        plt.xlabel("Index of LRs")
        plt.show()

        plt.plot(np.log10(lrs), losses)
        plt.ylabel("Loss")
        plt.xlabel("Log 10 Transform of Learning Rate")
        loss_coord = np.interp(np.log10(lr_to_use), np.log10(lrs), losses)
        plt.plot(np.log10(lr_to_use), loss_coord, markersize=10,marker='o',color='red')
        plt.show()
        
    return lr_to_use '''

In [None]:
model = EfficientNet.from_pretrained('efficientnet-b7', num_classes=data.c)
model._fc = nn.Linear(in_features=2560, out_features=5, bias=True) #Attaching a Linear layer at the end to support 5 classes

learn = Learner(data, model, metrics=[error_rate]).mixup().to_fp16()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

# One more step of manually choosing the learning rate

lr = 1e-05

In [None]:
#lr = find_appropriate_lr(learn, plot=True)

In [None]:
learn.fit_one_cycle(20, max_lr = slice(lr) ,callbacks=[SaveModelCallback(learn)])
learn.load('bestmodel')
learn.save('efficient_net_b7_v2_best')

#Analysis of the best model found
learn.recorder.plot_losses()
learn.recorder.plot_lr()

In [None]:
#Making all the layers trainable
learn.unfreeze()

In [None]:
#training the unfreezed model
learn.lr_find()
learn.recorder.plot(suggestion=True)
lr = 1e-05

In [None]:
#lr = find_appropriate_lr(learn, plot=True)

In [None]:
learn.fit_one_cycle(10, max_lr=slice(lr), callbacks=[SaveModelCallback(learn)])

learn.load('bestmodel')
learn.save('efficient_net_b7_v2_unfreeze_best')

# Predicting the results

In [None]:
result = []
for img_id in ids:
    img = open_image('Processed_data/test/'+img_id+'.tif')
    result.append(np.array(learn.predict(img)[2]))

# Saving the Results in .csv

In [None]:
df = pd.DataFrame(result, columns = ['concrete_cement','healthy_metal','incomplete','irregular_metal','other'])
df.insert(0,'id',ids)
df.to_csv('Submission Form.csv',index = False)