In [8]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler
import joblib

## Train and Export Model

In [12]:
file_path = '../Clean Data/Meteorological Station Data Clean.csv'
output_path = "./Exports/Climate Model/knn_model.joblib"

# Load data
station_data = pd.read_csv(file_path)
month_columns = [str(m) for m in range(1, 13)]
temperature_data = station_data[month_columns].astype(float).values

# Normalize to [-1, 1]
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_scores = scaler.fit_transform(temperature_data)

# Build (lat, lon, month) → score dataset
X, y = [], []
for i, row in station_data.iterrows():
    lat, lon = float(row["Latitude"]), float(row["Longitude"])
    for month in range(1, 13):
        X.append([lat, lon, month])
        y.append(normalized_scores[i, month - 1])

X = np.array(X)
y = np.array(y)

# Train model
model = KNeighborsRegressor(n_neighbors=5, weights='distance')
model.fit(X, y)

# Save model
joblib.dump(model, output_path)
print(f"Model saved to: {output_path}")


Model saved to: ./Exports/Climate Model/knn_model.joblib


## Example Testing

In [13]:
import joblib
import numpy as np

model_path = "./Exports/Climate Model/knn_model.joblib"

# Load model
model = joblib.load(model_path)

def predict_temperature_score(lat, lon, month):
    query = np.array([[lat, lon, month]], dtype=float)
    return float(model.predict(query)[0])

lat, lon, month = 25.0, 121.5, 12
score = predict_temperature_score(lat, lon, month)
print(f"Predicted normalized temperature score for ({lat}, {lon}) in month {month}: {score:.3f}")

Predicted normalized temperature score for (25.0, 121.5) in month 12: -0.543


## Compute the climate scores for the training dataset

In [15]:
import joblib
import numpy as np

model_path = "./Exports/Climate Model/knn_model.joblib"

# Load model
model = joblib.load(model_path)

def predict_temperature_score(lat, lon, month):
    query = np.array([[lat, lon, month]], dtype=float)
    return float(model.predict(query)[0])

consumption_original = pd.read_csv("../Clean Data/monthly consumption per station.csv")

df_score = consumption_original.copy()

climate_types_score = []
for i, row in df_score.iterrows():
    lat = row["Latitude"]
    lon = row["Longitude"]
    month = int(row["Month"])
    result = predict_temperature_score(lat, lon, month)
    climate_types_score.append(result)
df_score["ClimateType"] = climate_types_score

# Export Results
df_score.to_csv("../Clean Data/consumption_with_climate_score.csv", index=False)