# UHI + Additional Dataset
This notebook builds a model to predict UHI Index using Landsat LST and Sentinel-2 derived features.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import rasterio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import xarray as xr
from sklearn.preprocessing import StandardScaler

ModuleNotFoundError: No module named 'rasterio._base'

In [11]:
# Load the satellite data
def load_geotiff(file_path):
    with rasterio.open(file_path) as src:
        return src.read(1), src.transform

# Load Landsat LST data
lst_data, lst_transform = load_geotiff('Landsat_LST.tiff')

# Load Sentinel-2 data (contains NDVI, NDWI, EVI)
s2_data, s2_transform = load_geotiff('S2_sample.tiff')

# Load training data
train_df = pd.read_csv('Training_data_uhi_index_UHI2025-v2.csv')

NameError: name 'rasterio' is not defined

In [None]:
# Function to get pixel values at given coordinates
def get_pixel_values(lat, lon, data, transform):
    row, col = rasterio.transform.rowcol(transform, lon, lat)
    try:
        return data[row, col]
    except IndexError:
        return np.nan

# Extract features for each training point
train_df['lst'] = train_df.apply(lambda x: get_pixel_values(x['Latitude'], x['Longitude'], lst_data, lst_transform), axis=1)
train_df['s2_features'] = train_df.apply(lambda x: get_pixel_values(x['Latitude'], x['Longitude'], s2_data, s2_transform), axis=1)

# Drop rows with missing values
train_df = train_df.dropna()

In [None]:
# Prepare features and target
X = train_df[['lst', 's2_features']]
y = train_df['UHI Index']

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [None]:
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Make predictions on validation set
val_predictions = rf_model.predict(X_val_scaled)

# Evaluate the model
mse = mean_squared_error(y_val, val_predictions)
r2 = r2_score(y_val, val_predictions)

print(f'Mean Squared Error: {mse:.6f}')
print(f'R² Score: {r2:.6f}')

In [None]:
# Load and prepare submission template
submission_df = pd.read_csv('Submission_template_UHI2025-v2.csv')

# Extract features for submission points
submission_df['lst'] = submission_df.apply(lambda x: get_pixel_values(x['Latitude'], x['Longitude'], lst_data, lst_transform), axis=1)
submission_df['s2_features'] = submission_df.apply(lambda x: get_pixel_values(x['Latitude'], x['Longitude'], s2_data, s2_transform), axis=1)

# Scale features
X_submission = submission_df[['lst', 's2_features']]
X_submission_scaled = scaler.transform(X_submission)

# Make predictions
submission_df['UHI Index'] = rf_model.predict(X_submission_scaled)

# Save predictions
submission_df[['ID', 'UHI Index']].to_csv('UHI_predictions.csv', index=False)

In [None]:
submission_df