training (2020-2024 baseline)

In [None]:
!pip install earthengine-api geemap

In [None]:
import ee
import geemap

ee.Authenticate()
ee.Initialize(project='uhi-prediction-487709') #project id as on gee cloud

# defining boundary of India - area of interest
india = ee.FeatureCollection("FAO/GAUL/2015/level0") \
            .filter(ee.Filter.eq('ADM0_NAME', 'India'))

loading datasets

In [None]:
# Date Range
start = '2020-01-01'
end   = '2024-12-31'

# MODIS LST

modis = ee.ImageCollection('MODIS/061/MOD11A2') \
          .filterDate(start, end) \
          .filterBounds(india) \
          .select('LST_Day_1km')

lst = modis.mean() \
           .multiply(0.02) \
           .subtract(273.15) \
           .rename('LST_Day_1km')

print("LST loaded")

# ERA5 (Temperature + Wind + Precipitation)

era5 = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR") \
    .filterDate(start, end) \
    .filterBounds(india)

era_mean = era5.mean()

# Convert temperature to Celsius
air_temp = era_mean.select('temperature_2m') \
    .subtract(273.15) \
    .rename('mean_2m_air_temp')

# Wind speed calculation
wind_speed = era_mean.expression(
    'sqrt(u*u + v*v)', {
        'u': era_mean.select('u_component_of_wind_10m'),
        'v': era_mean.select('v_component_of_wind_10m')
    }).rename('wind_speed')

# Precipitation
precip = era_mean.select('total_precipitation_sum') \
    .rename('precipitation')

print("ERA5 variables loaded")

# MODIS NDVI

ndvi = ee.ImageCollection("MODIS/061/MOD13Q1") \
    .filterDate(start, end) \
    .filterBounds(india) \
    .select('NDVI') \
    .mean() \
    .multiply(0.0001) \
    .rename('NDVI')

print("NDVI loaded")

# Urban Mask (MODIS Land Cover)

landcover = ee.ImageCollection("MODIS/061/MCD12Q1") \
    .filterDate(start, end) \
    .first() \
    .select('LC_Type1')

urban_mask = landcover.eq(13).rename('Urban_Class')

print("Urban mask loaded")

# Elevation

elevation = ee.Image("USGS/SRTMGL1_003") \
              .select('elevation') \
              .clip(india)

print("Elevation loaded")

# Nighttime Lights

viirs = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG") \
    .filterDate(start, end) \
    .filterBounds(india) \
    .select('avg_rad') \
    .mean() \
    .rename('night_lights')

print("Night lights loaded")


# Final Combination of features

final_image = lst \
    .addBands(air_temp) \
    .addBands(wind_speed) \
    .addBands(precip) \
    .addBands(ndvi) \
    .addBands(urban_mask) \
    .addBands(elevation) \
    .addBands(viirs)

print("Final Bands:", final_image.bandNames().getInfo())

In [None]:
# Sample data

samples = final_image.sample(
    region=india.geometry(),
    scale=5000,
    numPixels=2500,
    seed=42,
    geometries=False
)

In [None]:
# exporting data to drive

task = ee.batch.Export.table.toDrive(
    collection=samples,
    description='UHI_India_Updated',
    folder='GEE_Exports',
    fileNamePrefix='UHI_India_Updated',
    fileFormat='CSV'
)

task.start()

print("Export started. Check GEE Tasks panel.")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# loading csv from drive

import pandas as pd

file_path = "/content/drive/MyDrive/GEE_Exports/UHI_India_Updated.csv"
df = pd.read_csv(file_path)

In [None]:
print("Columns in dataset:", df.columns)
df = df.drop(columns=['system:index', '.geo'])

# creating the target variable (UHI)
df['UHI'] = df['LST_Day_1km'] - df['mean_2m_air_temp']

# defining features (X) -
# not including LST_Day_1km (used to create UHI) and UHI (target)

features = [
    'mean_2m_air_temp',
    'wind_speed',
    'precipitation',
    'NDVI',
    'Urban_Class',
    'elevation',
    'night_lights'
]

X = df[features]
y = df['UHI']

model training

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

model = RandomForestRegressor(
    n_estimators=400,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("R2 Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

In [None]:
# saving model
import joblib

joblib.dump(model, "uhi_model_final.pkl")

# saving to google drive

joblib.dump(model, "/content/drive/MyDrive/UHI Model Phases/uhi_model_final.pkl")