In [1]:
!pip install timm
!pip install albumentations -U

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[?25l[K     |▉                               | 10 kB 26.0 MB/s eta 0:00:01[K     |█▊                              | 20 kB 20.8 MB/s eta 0:00:01[K     |██▋                             | 30 kB 11.3 MB/s eta 0:00:01[K     |███▌                            | 40 kB 9.0 MB/s eta 0:00:01[K     |████▍                           | 51 kB 5.1 MB/s eta 0:00:01[K     |█████▏                          | 61 kB 5.4 MB/s eta 0:00:01[K     |██████                          | 71 kB 5.8 MB/s eta 0:00:01[K     |███████                         | 81 kB 6.5 MB/s eta 0:00:01[K     |███████▉                        | 92 kB 6.3 MB/s eta 0:00:01[K     |████████▊                       | 102 kB 5.3 MB/s eta 0:00:01[K     |█████████▋                      | 112 kB 5.3 MB/s eta 0:00:01[K     |██████████▍                     | 122 kB 5.3 MB/s eta 0:00:01[K     |███████████▎                    | 133 kB 5.3 MB/s eta 0:00:01[K     

In [2]:
import os
import gc
import cv2
import sys
import math
import time
import timm
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

from PIL import Image

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score,classification_report
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A 
from albumentations.pytorch.transforms import ToTensorV2

In [3]:
train_data = pd.read_csv('/content/drive/MyDrive/AcresBurned/FinalDataset.csv')

In [4]:
seed = 1000
nfolds = 5

config = {'lr':5e-5,
          'wd':1e-2,
          'bs':32,
          'img_size':256,
          'nfolds':5,
          'epochs':10,
          'num_workers':4,
          'seed':1000,
          'model_name':'tf_efficientnet_b0',
         }

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONASSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed=config['seed'])

In [5]:
train_data = pd.read_csv('/content/drive/MyDrive/AcresBurned/FinalDataset.csv')

train_data.drop(['precipprob','snow','snowdepth','preciptype','windgust'],axis=1,inplace=True)

train_data.fillna(train_data.mean(),inplace=True)

train_data = train_data.query("(Latitude <= 90) & (Latitude >=-90)").reset_index(drop=True)
train_data = train_data.query("(Longitude <= 180) & (Longitude >=-180)").reset_index(drop=True)

tiff_image_path = '/content/drive/MyDrive/AcresBurned/tiff/'
jpeg_image_path = '/content/drive/MyDrive/AcresBurned/jpeg/'

classes = ['AnnualCrop', 'HerbaceousVegetation', 'PermanentCrop',
       'Industrial', 'Pasture', 'Highway', 'Residential', 'River',
       'SeaLake', 'Forest']

num_classes = len(classes)

class_to_label = {value:key for key,value in enumerate(classes)}
label_to_class = {key:value for key,value in enumerate(classes)}

train_data['previous_year'] = train_data['Started'].apply(lambda x:str(int(x[:4])-1)+x[4:])

train_data['gee_images'] = [f"gee_image{x}" for x in range(len(train_data))]
train_data['path'] = jpeg_image_path + train_data['gee_images'] + '.jpeg'

print("Number of unique Counties",train_data.Counties.nunique())
print("Number of rows {} Number of Columns {}".format(*train_data.shape))

Number of unique Counties 59
Number of rows 1443 Number of Columns 16


In [6]:
train_data.head()

Unnamed: 0,AcresBurned,Counties,Latitude,Longitude,Started,temp,feelslike,humidity,dew,precip,windspeed,winddir,pressure,previous_year,gee_images,path
0,300.0,Sutter,39.18832,-121.78003,2016-09-20T09:30:00Z,24.1,24.1,32.5,6.7,0.0,19.8,142.0,1011.8,2015-09-20T09:30:00Z,gee_image0,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...
1,117.0,Calaveras,38.29113,-120.62177,2017-07-17T12:55:00Z,20.1,20.1,42.7,7.0,0.0,2.5,76.0,1008.1,2016-07-17T12:55:00Z,gee_image1,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...
2,320.0,Madera,37.31191,-119.60412,2014-09-14T13:36:00Z,19.1,19.1,54.0,9.6,0.0,7.7,298.0,1007.7,2013-09-14T13:36:00Z,gee_image2,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...
3,104.0,Butte,39.8707,-121.8938,2015-10-02T14:38:00Z,12.2,12.2,91.9,10.9,0.0,1.3,354.0,1017.9,2014-10-02T14:38:00Z,gee_image3,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...
4,1314.0,San Benito,36.40755,-120.99322,2018-06-04T17:01:00Z,19.7,19.7,58.1,11.3,0.0,16.1,316.0,1014.9,2017-06-04T17:01:00Z,gee_image4,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...


## Download Image from google earth engine

Run the commented code if you want to download the images again.<br/>
all the images are stored in google drive and 
requires acces to google earh engine before running it.<br/>
Downloading and storing images will take much time

In [6]:
# import ee
# ee.Authenticate()
# ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code&code_challenge=Q5k8_U4qii1mRcSHDUvHKIY5o1cpF5EeasAqC_fCcy4&code_challenge_method=S256

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/1AX4XfWiWfUG29HdAhv54QkkSEGInUkCZIYPF9iqdDRxZ8a74KSOOKvbKiGw

Successfully saved authorization token.


In [24]:
# latitudes = train_data["Latitude"].tolist()
# longitudes = train_data["Longitude"].tolist()
# start_dates = train_data["Started"].tolist()
# prev_dates = train_data["previous_year"].tolist()

# for i in range(len(train_data)):
#   lat = latitudes[i]
#   lon = longitudes[i]
#   start_date = start_dates[i]
#   prev_date = prev_dates[i]

#   area_of_interest = ee.Geometry.Point(lon=lon, lat=lat)

#   tile = ee.ImageCollection('COPERNICUS/S2_SR')\
#   .filterBounds(area_of_interest)\
#   .filterDate(start_date,'2021-06-30')\
#   .sort('CLOUDY_PIXEL_PERCENTAGE')\ #for getting a clear image.
#   .first()

#   rgb_tile = tile.visualize(bands = ['B4', 'B3', 'B2'], max = 4000)

#   bounding_box = ee.Geometry.Rectangle([lon-0.01,lat-0.01,lon+0.001,lat+0.001])

#   task_config = {
#       'scale': 5,  
#       'image': rgb_tile,
#       'description': f'gee_image{i}',
#       'folder':'tiff',
#       'region': bounding_box.getInfo()['coordinates'],
#       'crs':'EPSG:4326',
#       'fileFormat':'GEO_TIFF',
#       }

#   task = ee.batch.Export.image.toDrive(**task_config)
#   task.start()

In [7]:
# def convert_tiff_to_jpeg(img_name):
#   img = Image.open(tiff_image_path+img_name+'.tif')
#   img.save(jpeg_image_path+img_name+'.jpeg')

# image_paths = train_data['gee_images'].tolist()
# for img_name in image_paths:
#   convert_tiff_to_jpeg(img_name)

## Predicting the image
Change runtime to GPU before running

In [7]:
class Model(nn.Module):
    def __init__(self,model_path,pretrained=True):
        super(Model,self).__init__()
        self.backbone = timm.create_model(model_path,pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Linear(in_features,128)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()
        self.layer = nn.Linear(128,num_classes)
    
    def forward(self,x):
        x = self.relu(self.backbone(x))
        x = self.layer(self.dropout(x))
        return x

In [8]:
def get_test_transforms():
    return A.Compose(
        [
            A.Resize(config['img_size'],config['img_size'],always_apply=True),
            A.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
           ),
            ToTensorV2(p=1.0)
        ])

In [13]:
class EuroSatDatasetTest(Dataset):
    def __init__(self,df,transforms=None):
        self.df = df
        self.transforms = transforms
    
    def __getitem__(self,idx):
        path = self.df.loc[idx,"path"]
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            augmented = self.transforms(image=image)
            image = augmented['image']
        
        image = torch.tensor(image,dtype=torch.float)
        
        return image
    
    def __len__(self):
        return len(self.df)

In [14]:
def get_prediction(df,model_paths,device='cuda'):
    predictions = list()
    
    for path,model_name in model_paths:
        model = Model(model_name,pretrained=False)

        for f in range(config['nfolds']):
            model.load_state_dict(torch.load(path.format(f),map_location=device))
            model.to(device)
            model.eval()

            test_ds = EuroSatDatasetTest(df,transforms=get_test_transforms())
            test_dl = DataLoader(test_ds,
                                batch_size = config["bs"],
                                num_workers = config['num_workers'],
                                shuffle=False,
                                pin_memory=True,
                                drop_last=False)

            with torch.no_grad():
                prediction = list()
                for i, inputs in enumerate(test_dl):
                    inputs = inputs.to(device)
                    outputs = model(inputs)
                    pred = outputs.detach().cpu().numpy().tolist()
                    prediction.extend(pred)
                predictions.append(prediction)

    torch.cuda.empty_cache()
    predictions = np.mean(predictions,axis=0).argmax(axis=1)
    return predictions

In [15]:
model_paths = [
    ('/content/drive/MyDrive/AcresBurned/models/model{0}.bin',config['model_name'])
]

In [16]:
train_data['covertype'] = get_prediction(train_data,model_paths)

In [17]:
train_data['covertype'] = train_data['covertype'].map(label_to_class)

In [18]:
train_data['covertype'].value_counts()

PermanentCrop           423
HerbaceousVegetation    247
SeaLake                 233
Highway                 171
Forest                  118
Industrial               84
Residential              79
AnnualCrop               78
River                    10
Name: covertype, dtype: int64

In [19]:
train_data.to_csv("/content/drive/MyDrive/AcresBurned/FinalCleanedDataset.csv",index=False)

In [20]:
train_data.head()

Unnamed: 0,AcresBurned,Counties,Latitude,Longitude,Started,temp,feelslike,humidity,dew,precip,windspeed,winddir,pressure,previous_year,gee_images,path,covertype
0,300.0,Sutter,39.18832,-121.78003,2016-09-20T09:30:00Z,24.1,24.1,32.5,6.7,0.0,19.8,142.0,1011.8,2015-09-20T09:30:00Z,gee_image0,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...,PermanentCrop
1,117.0,Calaveras,38.29113,-120.62177,2017-07-17T12:55:00Z,20.1,20.1,42.7,7.0,0.0,2.5,76.0,1008.1,2016-07-17T12:55:00Z,gee_image1,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...,Forest
2,320.0,Madera,37.31191,-119.60412,2014-09-14T13:36:00Z,19.1,19.1,54.0,9.6,0.0,7.7,298.0,1007.7,2013-09-14T13:36:00Z,gee_image2,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...,Forest
3,104.0,Butte,39.8707,-121.8938,2015-10-02T14:38:00Z,12.2,12.2,91.9,10.9,0.0,1.3,354.0,1017.9,2014-10-02T14:38:00Z,gee_image3,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...,PermanentCrop
4,1314.0,San Benito,36.40755,-120.99322,2018-06-04T17:01:00Z,19.7,19.7,58.1,11.3,0.0,16.1,316.0,1014.9,2017-06-04T17:01:00Z,gee_image4,/content/drive/MyDrive/AcresBurned/jpeg/gee_im...,PermanentCrop
