In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


2025-03-07 11:09:34.711418: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741363774.732478 3420465 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741363774.739072 3420465 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-07 11:09:34.761383: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
print("Loading training data...")

#load training dataset
training_data = pd.read_csv("Training_data_uhi_index_2025-02-18.csv")

#convert datetime column to usable format
training_data["datetime"] = pd.to_datetime(training_data["datetime"], format="%d-%m-%Y %H:%M")

training_data["hour"] = training_data["datetime"].dt.hour
training_data["month"] = training_data["datetime"].dt.month
training_data["day_of_week"] = training_data["datetime"].dt.dayofweek

training_data.drop(columns=["datetime", "Longitude", "Latitude"], inplace=True)

features = ["hour", "month", "day_of_week"]
X_train = training_data[features].values
y_train = training_data["UHI Index"].values 

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

y_scaler = MinMaxScaler()
y_train = y_scaler.fit_transform(y_train.reshape(-1, 1)).flatten()

print("Training data processed successfully.")

print("Loading validation data...")

validation_data = pd.read_csv("Submission_template.csv")

validation_data["hour"] = np.linspace(0, 23, len(validation_data), dtype=int) % 24
validation_data["month"] = np.linspace(1, 12, len(validation_data), dtype=int) % 12 + 1
validation_data["day_of_week"] = np.linspace(0, 6, len(validation_data), dtype=int) % 7

validation_data.drop(columns=["Longitude", "Latitude"], inplace=True)

X_val = scaler.transform(validation_data[features])


model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.1), 
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.05),
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=32, verbose=1)

print("Model training complete.")


y_train_pred = model.predict(X_train)

mae = mean_absolute_error(y_train, y_train_pred)
mse = mean_squared_error(y_train, y_train_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_train, y_train_pred)

#evaluation results
print("### Model Evaluation on Training Data ###")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R² Score: {r2:.4f}")

# ========== STEP 5: MAKE PREDICTIONS ON VALIDATION DATA ==========
print("Making predictions on validation dataset...")

#predict UHI Index on validation set
uhi_predictions = model.predict(X_val)

#transform predictions back to original UHI Index scale
uhi_predictions = y_scaler.inverse_transform(uhi_predictions.reshape(-1, 1)).flatten()

validation_data["UHI Index"] = uhi_predictions

validation_data = validation_data.sort_values(by=["month", "day_of_week", "hour"])

validation_data.to_csv("UHI_Predictions_Sorted.csv", index=False)
print("Predictions saved to 'UHI_Predictions_Sorted.csv'.")


Loading training data...
Training data processed successfully.
Loading validation data...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-03-07 11:11:26.067998: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0330 - mae: 0.1456 - val_loss: 0.0341 - val_mae: 0.1542
Epoch 2/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0331 - mae: 0.1456 - val_loss: 0.0342 - val_mae: 0.1546
Epoch 3/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0326 - mae: 0.1453 - val_loss: 0.0334 - val_mae: 0.1525
Epoch 4/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0326 - mae: 0.1450 - val_loss: 0.0334 - val_mae: 0.1525
Epoch 5/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0326 - mae: 0.1454 - val_loss: 0.0334 - val_mae: 0.1525
Epoch 6/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0329 - mae: 0.1455 - val_loss: 0.0333 - val_mae: 0.1522
Epoch 7/100
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/