# UHI Predictions Generator

This notebook loads the trained model and generates predictions using pickle files.

In [4]:
# Import required libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

## 1. Load and Prepare Data

In [11]:
# Load pickle files
with open('train_features (1).pkl', 'rb') as f:
    train_features = pickle.load(f)

with open('submit_features.pkl', 'rb') as f:
    test_df = pickle.load(f)

print("Training data shape:", train_df.shape)
print("Test data shape:", test_df.shape)


train_df = pd.read_csv('Training_data_uhi_index_UHI2025-v2.csv')
# Display first few rows of training data
train_features.head()

Training data shape: (11229, 4)
Test data shape: (1040, 14)


Unnamed: 0,lst,s2_features,location,temp_mean,temp_max,temp_min,temp_std,humidity_mean,humidity_max,humidity_min,wind_speed_mean,solar_flux_mean,solar_flux_max,wind_direction_mean,building_density,avg_building_height,building_coverage
0,38.393941,846.0,Bronx,24.789941,28.4,19.3,2.590496,54.445562,88.2,39.6,2.348521,387.869822,960,128.47929,170.614099,10.0,0.296461
1,38.393941,846.0,Bronx,24.789941,28.4,19.3,2.590496,54.445562,88.2,39.6,2.348521,387.869822,960,128.47929,169.340859,10.0,0.318623
2,37.785534,846.0,Bronx,24.789941,28.4,19.3,2.590496,54.445562,88.2,39.6,2.348521,387.869822,960,128.47929,170.614099,10.0,0.331206
3,37.785534,846.0,Bronx,24.789941,28.4,19.3,2.590496,54.445562,88.2,39.6,2.348521,387.869822,960,128.47929,169.340859,10.0,0.32767
4,37.358281,846.0,Bronx,24.789941,28.4,19.3,2.590496,54.445562,88.2,39.6,2.348521,387.869822,960,128.47929,165.521141,10.0,0.324722


In [12]:
# Define features
features = ['lst', 's2_features', 'temp_mean', 'temp_max', 'temp_min',]
           #'temp_std', 'humidity_mean', 'humidity_max', 'humidity_min']

# Prepare training features and target
X = train_features[features]
y = train_df['UHI Index']

# Prepare test features
X_test = test_df[features]

print("\nFeature shapes:")
print("X shape:", X.shape)
print("y shape:", y.shape)
print("X_test shape:", X_test.shape)


Feature shapes:
X shape: (11229, 9)
y shape: (11229,)
X_test shape: (1040, 9)


## 2. Data Preprocessing

In [13]:
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# Check for any missing values
print("\nMissing values in training data:")
print(X.isnull().sum())
print("\nMissing values in test data:")
print(X_test.isnull().sum())


Missing values in training data:
lst              0
s2_features      0
temp_mean        0
temp_max         0
temp_min         0
temp_std         0
humidity_mean    0
humidity_max     0
humidity_min     0
dtype: int64

Missing values in test data:
lst              0
s2_features      0
temp_mean        0
temp_max         0
temp_min         0
temp_std         0
humidity_mean    0
humidity_max     0
humidity_min     0
dtype: int64


## 3. Create and Train Model

In [14]:
def create_model(input_dim):
    model = Sequential([
        # Input layer
        Dense(64, activation='relu', input_dim=input_dim),
        BatchNormalization(),
        Dropout(0.2),
        
        # Hidden layers
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        
        # Output layer
        Dense(1, activation='linear')
    ])
    
    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Create the model
model = create_model(len(features))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                640       
                                                                 
 batch_normalization (BatchN  (None, 64)               256       
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 128)               8320      
                                                                 
 batch_normalization_1 (Batc  (None, 128)              512       
 hNormalization)                                                 
                                                                 
 dropout_1 (Dropout)         (None, 128)               0

In [15]:
# Train the model with early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_scaled, y,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


## 5. Generate Predictions

In [20]:
# Generate predictions
predictions = model.predict(X_test_scaled)

# Add predictions to the test dataframe
test_df['UHI Index'] = predictions

submit_df = pd.read_csv('Submission_template_UHI2025-v2.csv')
submit_df['UHI Index'] = test_df['UHI Index']

# Display prediction statistics
print("\nPrediction Statistics:")
print(test_df['UHI Index'].describe())


Prediction Statistics:
count    1040.000000
mean        0.997127
std         0.006224
min         0.984431
25%         0.992249
50%         0.995675
75%         1.001542
max         1.023232
Name: UHI Index, dtype: float64


## 6. Save Predictions

In [21]:
# Save predictions to CSV
output_file = 'UHI_prediction_v2.csv'
submit_df.to_csv(output_file, index=False)
print(f"\nPredictions saved to {output_file}")

# Display first few rows of the predictions
submit_df.head()


Predictions saved to UHI_prediction_v2.csv


Unnamed: 0,Longitude,Latitude,UHI Index
0,-73.971665,40.788763,0.993192
1,-73.971928,40.788875,0.994713
2,-73.96708,40.78908,0.992929
3,-73.97255,40.789082,0.99658
4,-73.969697,40.787953,0.992864
