### Climate Model

In [9]:
file_path = 'C:/Users/Tino/Documents/PowerComsuption_MachineLearning/PowerConsumtionPrediction/Clean Data/Meteorological Station Data Clean.csv'

##  Climate Model Approach 1 (Probability of each Cluster)

In [10]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# --------------------------------------------------------
# 1) Load the dataset
# --------------------------------------------------------
df = pd.read_csv(file_path)

# --------------------------------------------------------
# 2) Choose a target month (1 through 12) for classification
#    For example, to classify July, set target_month = 7.
#    The code below automatically computes thresholds (33rd & 66th percentiles)
#    on that month's station values to assign labels {-1, 0, 1}.
# --------------------------------------------------------
target_month = 12  # <-- change this to any month from 1..12

# --------------------------------------------------------
# 3) Extract the temperature values for the chosen month
# --------------------------------------------------------
# Convert to float in case the CSV columns are strings
monthly_vals = df[str(target_month)].astype(float).values  # shape: (n_stations,)

# --------------------------------------------------------
# 4) Compute 33rd & 66th percentiles to define three classes:
#      label = -1 if temp < p33
#              0  if p33 ≤ temp < p66
#              1  if temp ≥ p66
# --------------------------------------------------------
p33 = np.percentile(monthly_vals, 33)
p66 = np.percentile(monthly_vals, 66)

def assign_label(temp):
    if temp < p33:
        return -1
    elif temp < p66:
        return 0
    else:
        return 1

labels = np.array([assign_label(t) for t in monthly_vals])  # shape: (n_stations,)

# --------------------------------------------------------
# 5) Prepare features X = (Latitude, Longitude) and target y = labels
# --------------------------------------------------------
X = df[['Latitude', 'Longitude']].values.astype(float)  # shape: (n_stations, 2)
y = labels                                           # shape: (n_stations,)

# --------------------------------------------------------
# 6) Fit a K-Nearest Neighbors Classifier
#    - n_neighbors = 3 (use 3 nearest stations; adjust as needed)
#    - weights = 'distance' so closer stations count more in voting
# --------------------------------------------------------
knn_clf = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn_clf.fit(X, y)

# --------------------------------------------------------
# 7) Helper function: Predict class (-1, 0, or 1) for a given (lat, lon)
#    and return probabilities for each class.
# --------------------------------------------------------
def predict_month_class(lat, lon):
    """
    Input:
      lat, lon → floats: a point in Taiwan (decimal degrees)
    Returns:
      {
        'PredictedLabel': one of -1, 0, 1,
        'Probabilities': { -1: p_neg1, 0: p_zero, 1: p_pos1 }
      }
    """
    # 1) Build the query point
    query_pt = np.array([[lat, lon]], dtype=float)  # shape: (1,2)

    # 2) Predict class
    pred_label = int(knn_clf.predict(query_pt)[0])

    # 3) Predict probabilities for each class
    prob_array = knn_clf.predict_proba(query_pt)[0]  # shape: (n_classes,)
    # Map the classifier's classes_ array to probabilities
    probs = {int(cls): float(prob) for cls, prob in zip(knn_clf.classes_, prob_array)}

    return {
        'PredictedLabel': pred_label,
        'Probabilities': probs
    }

# --------------------------------------------------------
# 8) Example usage
# --------------------------------------------------------
if __name__ == "__main__":
    # Example: classify July (target_month=7) at (24.0°N, 121.0°E)
    lat_example, lon_example = 23.3751037, 120.1370086
    result = predict_month_class(lat_example, lon_example)
    print(f"Target month: {target_month}")
    print(f"Predicted class at ({lat_example}, {lon_example}): {result['PredictedLabel']}")
    print("Class probabilities:")
    for cls in sorted(result['Probabilities'].keys()):
        print(f"  Class {cls}: {result['Probabilities'][cls]:.3f}")


Target month: 12
Predicted class at (23.3751037, 120.1370086): 0
Class probabilities:
  Class -1: 0.000
  Class 0: 0.675
  Class 1: 0.325


##  Climate Model Approach 2 (Multiply each Probability cluster and final output a sum of them)

In [11]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# --------------------------------------------------------
# 1) Load the dataset
# --------------------------------------------------------
df = pd.read_csv(file_path)

# --------------------------------------------------------
# 2) Choose a target month (1 through 12) for classification
#    For example, to classify December, set target_month = 12.
#    The code below computes 33rd & 66th percentiles on that month's
#    station values to assign labels {-1, 0, 1}.
# --------------------------------------------------------
target_month = 12  # <-- change to any month from 1..12

# --------------------------------------------------------
# 3) Extract the temperature values for the chosen month
# --------------------------------------------------------
monthly_vals = df[str(target_month)].astype(float).values  # shape: (n_stations,)

# --------------------------------------------------------
# 4) Compute 33rd & 66th percentiles to define three classes:
#      label = -1 if temp < p33
#              0  if p33 ≤ temp < p66
#              1  if temp ≥ p66
# --------------------------------------------------------
p33 = np.percentile(monthly_vals, 33)
p66 = np.percentile(monthly_vals, 66)

def assign_label(temp):
    if temp < p33:
        return -1
    elif temp < p66:
        return 0
    else:
        return 1

labels = np.array([assign_label(t) for t in monthly_vals])  # shape: (n_stations,)

# --------------------------------------------------------
# 5) Prepare features X = (Latitude, Longitude) and target y = labels
# --------------------------------------------------------
X = df[['Latitude', 'Longitude']].values.astype(float)  # shape: (n_stations, 2)
y = labels                                            # shape: (n_stations,)

# --------------------------------------------------------
# 6) Fit a K-Nearest Neighbors Classifier
#    - n_neighbors = 3 (use 3 nearest stations; adjust as needed)
#    - weights = 'distance' so closer stations count more in voting
# --------------------------------------------------------
knn_clf = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn_clf.fit(X, y)

# --------------------------------------------------------
# 7) Helper function: Predict "score" in [-1, 1] by multiplying
#    each class label by its probability and summing:
#      score = (-1)*P(-1) + 0*P(0) + (+1)*P(1) = P(1) - P(-1)
# --------------------------------------------------------
def predict_month_score(lat, lon):
    """
    Input:
      lat, lon → floats: a point in Taiwan (decimal degrees)
    Returns:
      {
        'Score': float between -1 and +1,
        'Probabilities': { -1: p_neg1, 0: p_zero, 1: p_pos1 }
      }
      where Score = sum(label * probability).
    """
    # 1) Build the query point
    query_pt = np.array([[lat, lon]], dtype=float)  # shape: (1,2)

    # 2) Obtain probabilities for each class
    prob_array = knn_clf.predict_proba(query_pt)[0]  # shape: (n_classes,)
    class_probs = {int(cls): float(prob) for cls, prob in zip(knn_clf.classes_, prob_array)}

    # 3) Compute the weighted sum: sum(label * prob)
    score = sum(label * class_probs[label] for label in class_probs)

    return {
        'Score': score,
        'Probabilities': class_probs
    }

# --------------------------------------------------------
# 8) Example usage
# --------------------------------------------------------
if __name__ == "__main__":
    # Example: compute score for target_month at (23.3751037°N, 120.1370086°E)
    lat_example, lon_example = 23.3751037, 120.1370086
    result = predict_month_score(lat_example, lon_example)

    print(f"Target month: {target_month}")
    print(f"Predicted score at ({lat_example}, {lon_example}): {result['Score']:.3f}")
    print("Class probabilities:")
    for cls in sorted(result['Probabilities'].keys()):
        print(f"  Class {cls}: {result['Probabilities'][cls]:.3f}")


Target month: 12
Predicted score at (23.3751037, 120.1370086): 0.325
Class probabilities:
  Class -1: 0.000
  Class 0: 0.675
  Class 1: 0.325


# Climate Model, But with temp climate prediction 

In [12]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor

# --------------------------------------------------------
# 1) Load the dataset
# --------------------------------------------------------
# Make sure this path matches where your CSV is stored
df = pd.read_csv(file_path)

# --------------------------------------------------------
# 2) Prepare X (latitude, longitude) and Y (12-month targets)
# --------------------------------------------------------
# X: geographic coordinates of each station
X = df[['Latitude', 'Longitude']].values.astype(float)   # shape: (n_stations, 2)

# Y: 12-month temperature vector for each station
monthly_cols = [str(m) for m in range(1, 13)]
Y = df[monthly_cols].astype(float).values                # shape: (n_stations, 12)

# --------------------------------------------------------
# 3) Instantiate and fit a K-Nearest Neighbors Regressor
#    - n_neighbors=3 (you can change this)
#    - weights='distance' for inverse-distance weighting
# --------------------------------------------------------
knn = KNeighborsRegressor(n_neighbors=3, weights='distance')
knn.fit(X, Y)

# --------------------------------------------------------
# 4) Helper function: predict the full 12-month temperature profile
#    at any (lat, lon) using the fitted KNN regressor.
# --------------------------------------------------------
def predict_monthly_profile(lat, lon):
    """
    Input:
      lat, lon → floats: a point in Taiwan (decimal degrees)
    Returns:
      A NumPy array of length 12: [PredTemp_Jan, PredTemp_Feb, ..., PredTemp_Dec]
    """
    # 1) Build the query point array of shape (1,2)
    query_point = np.array([[lat, lon]], dtype=float)
    
    # 2) Use the KNN model to predict (1,12), then flatten to (12,)
    profile = knn.predict(query_point).ravel()
    return profile

# --------------------------------------------------------
# 5) Helper function: predict the average temperature over a month range
#    (start_month to end_month, inclusive).
# --------------------------------------------------------
def predict_range_average(lat, lon, start_month, end_month):
    """
    Input:
      lat, lon       → floats: a point in Taiwan
      start_month    → int in [1..12]
      end_month      → int in [start_month..12]
    Returns:
      A single float = average of predicted temperatures from start_month to end_month.
    """
    # 1) Get the full 12-month profile
    full_profile = predict_monthly_profile(lat, lon)   # array length 12
    
    # 2) Slice the desired months. Python indices: month m corresponds to index (m-1).
    selected_slice = full_profile[start_month - 1 : end_month]
    return float(np.mean(selected_slice))

# --------------------------------------------------------
# 6) Example usage
# --------------------------------------------------------
if __name__ == "__main__":
    # Example: predict the 12-month temperatures at (24.0°N, 121.0°E)
    lat_example, lon_example = 23.405163, 120.222536
    predicted_profile = predict_monthly_profile(lat_example, lon_example)
    print("Predicted 12-month profile at (24.0, 121.0):")
    for month_idx, temp in enumerate(predicted_profile, start=1):
        print(f"  Month {month_idx:2d}: {temp:.2f} °C")
    
    # Example: predict the average temperature from June (6) to August (8)
    avg_JJA = predict_range_average(lat_example, lon_example, 6, 7)
    print(f"\nPredicted June–August average at (23.405163, 120.222536): {avg_JJA:.2f} °C")

    


Predicted 12-month profile at (24.0, 121.0):
  Month  1: 17.55 °C
  Month  2: 19.52 °C
  Month  3: 21.37 °C
  Month  4: 26.27 °C
  Month  5: 26.45 °C
  Month  6: 29.14 °C
  Month  7: 29.33 °C
  Month  8: 29.12 °C
  Month  9: 28.56 °C
  Month 10: 26.50 °C
  Month 11: 22.95 °C
  Month 12: 18.13 °C

Predicted June–August average at (23.405163, 120.222536): 29.24 °C


## Classification Model

In [13]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
import numpy as np

# --------------------------------------------------------
# 1) Load the dataset
# --------------------------------------------------------
file_path = 'C:/Users/Tino/Documents/PowerComsuption_MachineLearning/PowerConsumtionPrediction/Clean Data/Meteorological Station Data Clean.csv'
df = pd.read_csv(file_path)

# --------------------------------------------------------
# 2) Prepare data for clustering: columns "1" through "12"
# --------------------------------------------------------
monthly_cols = [str(i) for i in range(1, 13)]
X = df[monthly_cols].astype(float).values  # shape: (n_stations, 12)

# --------------------------------------------------------
# 3) Run KMeans (e.g., k = 3 clusters)
# --------------------------------------------------------
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
labels = kmeans.fit_predict(X)
df['Cluster'] = labels

# --------------------------------------------------------
# 4) Map KMeans clusters {0,1,2} to target labels {-1, 0, 1}
#    (you can adjust this mapping if desired)
# --------------------------------------------------------
cluster_to_label = {0: -1, 1: 0, 2: 1}
df['Label'] = df['Cluster'].map(cluster_to_label)
y = df['Label'].values  # target array of shape (n_stations,)

# --------------------------------------------------------
# 5) Train a (multinomial) Logistic Regression on the 12-month vectors
#    This will be our "sigmoid-based" classifier with three classes: -1, 0, 1
# --------------------------------------------------------
clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42, max_iter=1000)
clf.fit(X, y)

# --------------------------------------------------------
# 6) Define Haversine distance for nearest-station lookup
# --------------------------------------------------------
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in kilometers
    φ1, φ2 = np.radians(lat1), np.radians(lat2)
    Δφ = np.radians(lat2 - lat1)
    Δλ = np.radians(lon2 - lon1)
    a = np.sin(Δφ / 2)**2 + np.cos(φ1) * np.cos(φ2) * np.sin(Δλ / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# --------------------------------------------------------
# 7) Modified predict_climate function using the Logistic Regression model
# --------------------------------------------------------
def predict_climate_sigmoid(lat, lon, start_month, end_month):
    """
    Given lat/lon in Taiwan and a month range, returns:
      - 'Station': nearest station name/ID
      - 'PredictedLabel': one of -1, 0, or 1 (sigmoid-based classification)
      - 'AverageTemp': average temperature over [start_month, end_month]
      - 'Probabilities': probability for each class [-1, 0, 1]
    """
    # 1) Find nearest station by distance
    distances = df.apply(
        lambda row: haversine(lat, lon, row['Latitude'], row['Longitude']),
        axis=1
    )
    nearest_idx = distances.idxmin()
    station_name = df.loc[nearest_idx, 'Station']

    # 2) Extract the 12-month vector of that station
    x_station = df.loc[nearest_idx, monthly_cols].astype(float).values.reshape(1, -1)  # shape (1,12)

    # 3) Predict label (-1, 0, or 1) using logistic regression
    predicted_label = int(clf.predict(x_station)[0])

    # 4) Get prediction probabilities
    prob_array = clf.predict_proba(x_station)[0]
    # Map classes to their probabilities
    class_probabilities = dict(zip(clf.classes_, prob_array))

    # 5) Compute average temperature over the requested month range
    month_keys = [str(m) for m in range(start_month, end_month + 1)]
    temps = df.loc[nearest_idx, month_keys].astype(float).values
    average_temp = float(np.mean(temps))

    return {
        'Station': station_name,
        'PredictedLabel': predicted_label,
        'AverageTemp': average_temp,
        'Probabilities': class_probabilities
    }

# --------------------------------------------------------
# 8) Demonstrate usage:
#    Example: predict climate at (24.0, 121.0) for June (6) through August (8)
# --------------------------------------------------------
example_output = predict_climate_sigmoid(24.0, 121.0, 6, 8)
print('\n',example_output)

example_output = predict_climate_sigmoid(23.405163, 120.222536, 6, 8) 
print('\n',example_output)




 {'Station': '南投  Nantou C0I460', 'PredictedLabel': 1, 'AverageTemp': 29.0, 'Probabilities': {-1: 0.007032281008512765, 0: 0.026477296206278075, 1: 0.9664904227852092}}

 {'Station': '嘉義  CHIAYI 467480', 'PredictedLabel': 1, 'AverageTemp': 29.100000000000005, 'Probabilities': {-1: 0.0029996198601617103, 0: 0.08226002913470926, 1: 0.914740351005129}}


