### Climate Model (input: Coordinates and Time / Output: Climate Type)

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
import numpy as np

# --------------------------------------------------------
# 1) Load the dataset
# --------------------------------------------------------
file_path = '../Clean Data/Meteorological Station Data Clean.csv'
df = pd.read_csv(file_path)

# --------------------------------------------------------
# 2) Prepare data for clustering: columns "1" through "12"
# --------------------------------------------------------
monthly_cols = [str(i) for i in range(1, 13)]
X = df[monthly_cols].astype(float).values  # shape: (n_stations, 12)

# --------------------------------------------------------
# 3) Run KMeans (e.g., k = 3 clusters)
# --------------------------------------------------------
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
labels = kmeans.fit_predict(X)
df['Cluster'] = labels

# --------------------------------------------------------
# 4) Map KMeans clusters {0,1,2} to target labels {-1, 0, 1}
#    (you can adjust this mapping if desired)
# --------------------------------------------------------
cluster_to_label = {0: -1, 1: 0, 2: 1}
df['Label'] = df['Cluster'].map(cluster_to_label)
y = df['Label'].values  # target array of shape (n_stations,)

# --------------------------------------------------------
# 5) Train a (multinomial) Logistic Regression on the 12-month vectors
#    This will be our "sigmoid-based" classifier with three classes: -1, 0, 1
# --------------------------------------------------------
clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42, max_iter=1000)
clf.fit(X, y)

# --------------------------------------------------------
# 6) Define Haversine distance for nearest-station lookup
# --------------------------------------------------------
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in kilometers
    φ1, φ2 = np.radians(lat1), np.radians(lat2)
    Δφ = np.radians(lat2 - lat1)
    Δλ = np.radians(lon2 - lon1)
    a = np.sin(Δφ / 2)**2 + np.cos(φ1) * np.cos(φ2) * np.sin(Δλ / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# --------------------------------------------------------
# 7) Modified predict_climate function using the Logistic Regression model
# --------------------------------------------------------
def predict_climate_sigmoid(lat, lon, start_month, end_month):
    """
    Given lat/lon in Taiwan and a month range, returns:
      - 'Station': nearest station name/ID
      - 'PredictedLabel': one of -1, 0, or 1 (sigmoid-based classification)
      - 'AverageTemp': average temperature over [start_month, end_month]
      - 'Probabilities': probability for each class [-1, 0, 1]
    """
    # 1) Find nearest station by distance
    distances = df.apply(
        lambda row: haversine(lat, lon, row['Latitude'], row['Longitude']),
        axis=1
    )
    nearest_idx = distances.idxmin()
    station_name = df.loc[nearest_idx, 'Station']

    # 2) Extract the 12-month vector of that station
    x_station = df.loc[nearest_idx, monthly_cols].astype(float).values.reshape(1, -1)  # shape (1,12)

    # 3) Predict label (-1, 0, or 1) using logistic regression
    predicted_label = int(clf.predict(x_station)[0])

    # 4) Get prediction probabilities
    prob_array = clf.predict_proba(x_station)[0]
    # Map classes to their probabilities
    class_probabilities = dict(zip(clf.classes_, prob_array))

    # 5) Compute average temperature over the requested month range
    month_keys = [str(m) for m in range(start_month, end_month + 1)]
    temps = df.loc[nearest_idx, month_keys].astype(float).values
    average_temp = float(np.mean(temps))

    return {
        'Station': station_name,
        'PredictedLabel': predicted_label,
        'AverageTemp': average_temp,
        'Probabilities': class_probabilities
    }

# --------------------------------------------------------
# 8) Demonstrate usage:
#    Example: predict climate at (24.0, 121.0) for June (6) through August (8)
# --------------------------------------------------------
example_output = predict_climate_sigmoid(24.0, 121.0, 6, 8)
print('\n',example_output)

example_output = predict_climate_sigmoid(23.405163, 120.222536, 6, 8) 
print('\n',example_output)



FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Tino/Documents/PowerComsuption_MachineLearning/PowerConsumtionPrediction/Clean Data/Meteorological Station Data Clean.csv'