In [1]:
!pip install timm
!pip install albumentations -U
!pip install colorama
!pip install gmaps
!pip install optuna

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[?25l[K     |▉                               | 10 kB 25.4 MB/s eta 0:00:01[K     |█▊                              | 20 kB 30.7 MB/s eta 0:00:01[K     |██▋                             | 30 kB 28.5 MB/s eta 0:00:01[K     |███▌                            | 40 kB 21.0 MB/s eta 0:00:01[K     |████▍                           | 51 kB 14.4 MB/s eta 0:00:01[K     |█████▏                          | 61 kB 11.4 MB/s eta 0:00:01[K     |██████                          | 71 kB 12.3 MB/s eta 0:00:01[K     |███████                         | 81 kB 13.5 MB/s eta 0:00:01[K     |███████▉                        | 92 kB 14.2 MB/s eta 0:00:01[K     |████████▊                       | 102 kB 12.7 MB/s eta 0:00:01[K     |█████████▋                      | 112 kB 12.7 MB/s eta 0:00:01[K     |██████████▍                     | 122 kB 12.7 MB/s eta 0:00:01[K     |███████████▎                    | 133 kB 12.7 MB/s eta 0:00:0

In [2]:
import os
import gc
import cv2
import sys
import time
import timm
import tqdm
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff

from PIL import Image
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

import gmaps
import optuna

from colorama import Fore, Back, Style
y_ = Fore.YELLOW
r_ = Fore.RED
g_ = Fore.GREEN
b_ = Fore.BLUE
m_ = Fore.MAGENTA
c_ = Fore.CYAN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings('ignore')

import lightgbm as lgb

from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import (StandardScaler,PowerTransformer,
                                   QuantileTransformer,LabelEncoder, 
                                   OneHotEncoder,RobustScaler)


import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A 
from albumentations.pytorch.transforms import ToTensorV2

In [None]:
import ee
ee.Authenticate()
ee.Initialize()

In [37]:
df = pd.read_csv('/content/drive/MyDrive/AcresBurned/FinalDataset.csv').sample(n=10).reset_index(drop=True)

# you might have to run this commented code for any other dataset but currenly I am
#testing it on cleaned train data so it is fine.

# df = pd.read_csv('/content/drive/MyDrive/AcresBurned/FinalDataset.csv')

# df.drop(['precipprob','snow','snowdepth','preciptype','windgust'],axis=1,inplace=True)

# df.fillna(df.mean(),inplace=True)

# df = df.query("(Latitude <= 90) & (Latitude >=-90)").reset_index(drop=True)
# df = df.query("(Longitude <= 180) & (Longitude >=-180)").reset_index(drop=True)


tiff_image_path = '/content/drive/MyDrive/AcresBurned/test_images/'
jpeg_image_path = '/content/drive/MyDrive/AcresBurned/test_images/'

classes = ['AnnualCrop', 'HerbaceousVegetation', 'PermanentCrop',
       'Industrial', 'Pasture', 'Highway', 'Residential', 'River',
       'SeaLake', 'Forest']

num_classes = len(classes)

class_to_label = {value:key for key,value in enumerate(classes)}
label_to_class = {key:value for key,value in enumerate(classes)}

df['previous_year'] = df['Started'].apply(lambda x:str(int(x[:4])-1)+x[4:])

df['gee_images'] = [f"gee_image{x}" for x in range(len(df))]
df['path'] = jpeg_image_path + df['gee_images'] + '.jpeg'

print("Number of unique Counties",df.Counties.nunique())
print("Number of rows {} Number of Columns {}".format(*df.shape))

Number of unique Counties 10
Number of rows 10 Number of Columns 21


In [23]:
seed = 1000
nfolds = 5

config = {'lr':5e-5,
          'wd':1e-2,
          'bs':32,
          'img_size':256,
          'nfolds':5,
          'epochs':10,
          'num_workers':4,
          'seed':1000,
          'model_name':'tf_efficientnet_b0',
         }

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONASSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed=config['seed'])

## Get The Image

In [27]:
latitudes = df["Latitude"].tolist()
longitudes = df["Longitude"].tolist()
start_dates = df["Started"].tolist()
prev_dates = df["previous_year"].tolist()

for i in range(len(train_data)):
  lat = latitudes[i]
  lon = longitudes[i]
  start_date = start_dates[i]
  prev_date = prev_dates[i]

  area_of_interest = ee.Geometry.Point(lon=lon, lat=lat)

  tile = ee.ImageCollection('COPERNICUS/S2_SR').filterBounds(area_of_interest).filterDate(start_date,'2021-06-30').sort('CLOUDY_PIXEL_PERCENTAGE').first()

  rgb_tile = tile.visualize(bands = ['B4', 'B3', 'B2'], max = 4000)

  bounding_box = ee.Geometry.Rectangle([lon-0.01,lat-0.01,lon+0.001,lat+0.001])

  task_config = {
      'scale': 5,  
      'image': rgb_tile,
      'description': f'gee_image{i}',
      'folder':'test_images',
      'region': bounding_box.getInfo()['coordinates'],
      'crs':'EPSG:4326',
      'fileFormat':'GEO_TIFF',
      }

  task = ee.batch.Export.image.toDrive(**task_config)
  task.start()

Downloaded images will take time to appear in the drive.<br/>
so run all the code below after the image appears

In [30]:
def convert_tiff_to_jpeg(img_name):
  img = Image.open(tiff_image_path+img_name+'.tif')
  img.save(jpeg_image_path+img_name+'.jpeg')

image_paths = train_data['gee_images'].tolist()
for img_name in image_paths:
  convert_tiff_to_jpeg(img_name)

## Predict Cover Type

Mount it to google drive.
and you might have to change the path of the drive in some part of the code

and change the runtime to gpu before running

In [31]:
class Model(nn.Module):
    def __init__(self,model_path,pretrained=True):
        super(Model,self).__init__()
        self.backbone = timm.create_model(model_path,pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Linear(in_features,128)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()
        self.layer = nn.Linear(128,num_classes)
    
    def forward(self,x):
        x = self.relu(self.backbone(x))
        x = self.layer(self.dropout(x))
        return x

In [32]:
def get_test_transforms():
    return A.Compose(
        [
            A.Resize(config['img_size'],config['img_size'],always_apply=True),
            A.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
           ),
            ToTensorV2(p=1.0)
        ])

In [33]:
class EuroSatDatasetTest(Dataset):
    def __init__(self,df,transforms=None):
        self.df = df
        self.transforms = transforms
    
    def __getitem__(self,idx):
        path = self.df.loc[idx,"path"]
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            augmented = self.transforms(image=image)
            image = augmented['image']
        
        image = torch.tensor(image,dtype=torch.float)
        
        return image
    
    def __len__(self):
        return len(self.df)

In [34]:
def get_prediction(df,model_paths,device='cuda'):
    predictions = list()
    
    for path,model_name in model_paths:
        model = Model(model_name,pretrained=False)

        for f in range(config['nfolds']):
            model.load_state_dict(torch.load(path.format(f),map_location=device))
            model.to(device)
            model.eval()

            test_ds = EuroSatDatasetTest(df,transforms=get_test_transforms())
            test_dl = DataLoader(test_ds,
                                batch_size = config["bs"],
                                num_workers = config['num_workers'],
                                shuffle=False,
                                pin_memory=True,
                                drop_last=False)

            with torch.no_grad():
                prediction = list()
                for i, inputs in enumerate(test_dl):
                    inputs = inputs.to(device)
                    outputs = model(inputs)
                    pred = outputs.detach().cpu().numpy().tolist()
                    prediction.extend(pred)
                predictions.append(prediction)

    torch.cuda.empty_cache()
    predictions = np.mean(predictions,axis=0).argmax(axis=1)
    return predictions

In [35]:
model_paths = [
    ('/content/drive/MyDrive/AcresBurned/models/model{0}.bin',config['model_name'])
]

In [38]:
df['covertype'] = get_prediction(df,model_paths)
df['covertype'] = df['covertype'].map(label_to_class)

## Predict AcresBurned

In [39]:
cat_feature = ['Counties','covertype']
features= ['Counties','temp','feelslike','humidity','dew','precip','windspeed','winddir','pressure','covertype']

X = df[features]

for c in cat_feature:
  le = LabelEncoder()
  X[c] = le.fit_transform(X[c])

normalize = StandardScaler()
X = normalize.fit_transform(X)

In [40]:
lgb_models = list()

for i in range(nfolds):
  model = lgb.Booster(model_file=f'/content/drive/MyDrive/AcresBurned/models/lgb_model{i}.txt')
  lgb_models.append(model)

In [41]:
def make_prediction(x_test,lgb_models):
  return np.mean([model.predict(x_test) for model in lgb_models],axis=0)

In [43]:
df['AcresBurnedPrediction'] = make_prediction(X,lgb_models)

In [44]:
df['AcresBurnedPrediction'].head()

0   -1867.391874
1     241.432010
2    3042.853020
3    4807.369122
4    4720.891876
Name: AcresBurnedPrediction, dtype: float64