In [1]:
# Importing required libraries 
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="darkgrid")
sns.set(style='ticks')
sns.set_style('white')
%matplotlib inline

from sklearn.model_selection import train_test_split

from lightgbm import LGBMRegressor
from sklearn.model_selection import cross_val_score, RandomizedSearchCV, GridSearchCV

import warnings
warnings.filterwarnings("ignore")

# Load resampled_data from the pickle file
def load_from_pickle(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    return data

def wetbulb_temperature_actual_predicted_scatter(test_data, test_predictions, output_columns):
    
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 12), constrained_layout=True)
    counter = [0, 0]

    # Styling parameters
    marker_size = 50
    marker_color = 'blue'
    marker_alpha = 0.5

    for region, region_data in test_data.groupby("Region"):
        # Resample the region data to 6-hour frequency for better readability
        region_data_resampled = region_data.resample('6H').mean()

        # Plot the resampled data with custom styling
        region_data_resampled.plot(ax=axes[counter[0], counter[1]], x="WetBulbTemperature", y=output_columns, kind="scatter", color="red", title=region, s=marker_size, alpha=marker_alpha, label="Actual Data")

        if counter[1] < 2: 
            counter[1] += 1
        elif counter[1] == 2: 
            counter[1] = 0
            counter[0] += 1

    counter = [0, 0]

    for region_predictions in test_predictions:
        # Resample the predicted data to 6-hour frequency for better readability
        region_predictions_resampled = region_predictions.resample('6H').mean()

        # Plot the resampled predicted data with custom styling
        region_predictions_resampled.plot(ax=axes[counter[0], counter[1]], x="WetBulbTemperature", y=output_columns, kind="scatter", color=marker_color, s=marker_size, alpha=marker_alpha, label="Predicted Data")

        if counter[1] < 2: 
            counter[1] += 1
        elif counter[1] == 2: 
            counter[1] = 0
            counter[0] += 1

    # Remove unused subplots
    for i in range(len(test_data["Region"].unique()), len(axes.flat)):
        fig.delaxes(axes.flatten()[i])

    plt.show()

def air_temperature_actual_predicted_scatter(test_data, test_predictions, output_columns):
    
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 12), constrained_layout=True)
    counter = [0, 0]

    # Styling parameters
    marker_size = 50
    marker_color = 'blue'
    marker_alpha = 0.5

    for region, region_data in test_data.groupby("Region"):
        # Resample the region data to 6-hour frequency for better readability
        region_data_resampled = region_data.resample('6H').mean()

        # Plot the resampled data with custom styling
        region_data_resampled.plot(ax=axes[counter[0], counter[1]], x="AirTemperature", y=output_columns, kind="scatter", color="red", title=region, s=marker_size, alpha=marker_alpha, label="Actual Data")

        if counter[1] < 2: 
            counter[1] += 1
        elif counter[1] == 2: 
            counter[1] = 0
            counter[0] += 1

    counter = [0, 0]

    for region_predictions in test_predictions:
        # Resample the predicted data to 6-hour frequency for better readability
        region_predictions_resampled = region_predictions.resample('6H').mean()

        # Plot the resampled predicted data with custom styling
        region_predictions_resampled.plot(ax=axes[counter[0], counter[1]], x="AirTemperature", y=output_columns, kind="scatter", color=marker_color, s=marker_size, alpha=marker_alpha, label="Predicted Data")

        if counter[1] < 2: 
            counter[1] += 1
        elif counter[1] == 2: 
            counter[1] = 0
            counter[0] += 1

    # Remove unused subplots
    for i in range(len(test_data["Region"].unique()), len(axes.flat)):
        fig.delaxes(axes.flatten()[i])

    plt.show()

def dewpoint_temperature_actual_predicted_scatter(test_data, test_predictions, output_columns):
    
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 12), constrained_layout=True)
    counter = [0, 0]

    # Styling parameters
    marker_size = 50
    marker_color = 'blue'
    marker_alpha = 0.5

    for region, region_data in test_data.groupby("Region"):
        # Resample the region data to 6-hour frequency for better readability
        region_data_resampled = region_data.resample('6H').mean()

        # Plot the resampled data with custom styling
        region_data_resampled.plot(ax=axes[counter[0], counter[1]], x="DewTemperature", y=output_columns, kind="scatter", color="red", title=region, s=marker_size, alpha=marker_alpha, label="Actual Data")

        if counter[1] < 2: 
            counter[1] += 1
        elif counter[1] == 2: 
            counter[1] = 0
            counter[0] += 1

    counter = [0, 0]

    for region_predictions in test_predictions:
        # Resample the predicted data to 6-hour frequency for better readability
        region_predictions_resampled = region_predictions.resample('6H').mean()

        # Plot the resampled predicted data with custom styling
        region_predictions_resampled.plot(ax=axes[counter[0], counter[1]], x="DewTemperature", y=output_columns, kind="scatter", color=marker_color, s=marker_size, alpha=marker_alpha, label="Predicted Data")

        if counter[1] < 2: 
            counter[1] += 1
        elif counter[1] == 2: 
            counter[1] = 0
            counter[0] += 1

    # Remove unused subplots
    for i in range(len(test_data["Region"].unique()), len(axes.flat)):
        fig.delaxes(axes.flatten()[i])

    plt.show()

def actual_predicted_demand_line_plot(test_data, test_predictions, output_columns):
    
    # Define Tableau Palette colors
    tableau_blue = (31/255, 119/255, 180/255)  
    tableau_orange = (255/255, 127/255, 14/255)  

    # Increase height of the figure
    fig, axes = plt.subplots(nrows=5, figsize=(20, 15), constrained_layout=True)

    for i, (region, region_data) in enumerate(test_data.groupby("Region")):
        # Resample the region data to daily frequency
        region_data_resampled = region_data.resample('D').mean()

        # Plot the resampled region data with Tableau Blue color
        region_data_resampled[output_columns].plot(ax=axes[i], title=region, color=tableau_blue)

        # Resample the predicted data to daily frequency
        predictions_resampled = test_predictions[i].resample('D').mean()

        # Plot the resampled predicted data with Tableau Orange color
        predictions_resampled[output_columns].plot(ax=axes[i], color=tableau_orange)

        axes[i].set_ylabel("Adjusted Demand")
        axes[i].legend(["Demand", "Demand Prediction"])

    plt.show()

def actual_predicted_2019_demand_line_plot(test_data, test_predictions, output_columns):
    
    # Filter data for the year 2019
    test_data_2019 = test_data[test_data.index.year == 2019]
    test_predictions_2019 = [pred[pred.index.year == 2019] for pred in test_predictions]

    # Define Tableau Palette colors
    tableau_blue = (31/255, 119/255, 180/255)  # Tableau Blue
    tableau_orange = (255/255, 127/255, 14/255)  # Tableau Orange

    # Increase height of the figure
    fig, axes = plt.subplots(nrows=5, figsize=(20, 15), constrained_layout=True)

    for i, (region, region_data) in enumerate(test_data_2019.groupby("Region")):
        # Resample the region data to daily frequency
        region_data_resampled = region_data.resample('D').mean()

        # Plot the resampled region data with Tableau Blue color
        region_data_resampled[output_columns].plot(ax=axes[i], title=region, color=tableau_blue)

        # Resample the predicted data to daily frequency
        predictions_resampled = test_predictions_2019[i].resample('D').mean()

        # Plot the resampled predicted data with Tableau Orange color
        predictions_resampled[output_columns].plot(ax=axes[i], color=tableau_orange)

        axes[i].set_ylabel("Adjusted Demand")
        axes[i].legend(["Demand", "Demand Prediction"])

    plt.show()

def feature_importance_regional_bar_plot(regressors, train_data, input_columns):
    
    # Define a custom color palette with 10 distinct colors from the 'tab10' palette
    custom_palette = sns.color_palette("tab10")

    for idx, grid_search in enumerate(regressors):
        region = train_data["Region"].unique()[idx]
        best_model = grid_search.best_estimator_

        feature_importance = best_model.feature_importances_
        feature_importance_normalized = np.divide(feature_importance, np.sum(feature_importance)) * 100  # Normalize to sum up to 100
        sorted_indices = np.argsort(feature_importance)[::-1]

        # Filter features with importance greater than zero
        non_zero_indices = sorted_indices[feature_importance_normalized[sorted_indices] > 0]

        # Plotting with a unique color for each region
        plt.figure(figsize=(17, 8))
        bars = plt.barh(range(len(non_zero_indices)), feature_importance_normalized[non_zero_indices], align='center', color=custom_palette[idx], edgecolor='black')
        plt.yticks(range(len(non_zero_indices)), [input_columns[i] for i in non_zero_indices], fontsize=12)
        plt.xticks(fontsize=12)
        plt.xlabel('Normalized Importance', fontsize=14)
        plt.title(f'Feature Importance for {region} model', fontsize=16, fontweight="bold")
        plt.gca().invert_yaxis()
        plt.grid(axis='x', linestyle='--', alpha=0.7)

        # Add feature importance values in front of the bars
        for bar, importance in zip(bars, feature_importance_normalized[non_zero_indices]):
            plt.text(bar.get_width(), bar.get_y() + bar.get_height() / 2, f'{importance:.2f}%', va='center', ha='left', fontsize=12, color='black')

        plt.tight_layout()
        plt.show()

# Features to select
selected_features = ['Precipitation', 'RelativeHumidity%', 'AirTemperature', 'WetBulbTemperature', 'DewTemperature',
                     'SeaPressure', 'StationPressure']
        
def weather_feature_importance_regional_bar_plot(regressors, train_data, input_columns, selected_features):
    
    # Define a custom color palette with five distinct colors
    custom_palette = sns.color_palette("husl", 5)

    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(20, 12), constrained_layout=True)

    for idx, grid_search in enumerate(regressors):
        region = train_data["Region"].unique()[idx]
        best_model = grid_search.best_estimator_

        # Assuming best_model is a LightGBM model
        feature_importance = best_model.feature_importances_
        feature_importance_normalized = np.divide(feature_importance, np.sum(feature_importance)) * 100  # Normalize to sum up to 100

        # Filter features with importance greater than zero
        non_zero_indices = np.arange(len(input_columns))[feature_importance_normalized > 0]

        # Select only the selected features and their importance values
        selected_indices = [input_columns.index(feature) for feature in selected_features if feature in input_columns]
        selected_importance = feature_importance_normalized[selected_indices]
        selected_columns = [input_columns[i] for i in selected_indices]

        # Sort selected features based on their importance
        sorted_indices = np.argsort(selected_importance)[::-1]
        sorted_importance = selected_importance[sorted_indices]
        sorted_columns = [selected_columns[i] for i in sorted_indices]

        # Determine the subplot position
        if idx < 2:
            ax = axes[0, idx]
        elif 2 <= idx < 4:
            ax = axes[1, idx - 2]
        else:
            ax = axes[2, idx - 4]

        # Plotting with a unique color for each region
        bars = ax.barh(range(len(sorted_columns)), sorted_importance, align='center', color=custom_palette[idx], edgecolor='black')
        ax.set_yticks(range(len(sorted_columns)))
        ax.set_yticklabels(sorted_columns, fontsize=12)
        ax.set_xticks([])
        ax.set_xlabel('Normalized Importance', fontsize=14)
        ax.set_title(f'Feature Importance for {region} model', fontsize=16, fontweight="bold")
        ax.invert_yaxis()
        ax.grid(axis='x', linestyle='--', alpha=0.7)

        # Add feature importance values in front of the bars
        for bar, importance in zip(bars, sorted_importance):
            ax.text(bar.get_width(), bar.get_y() + bar.get_height() / 2, f'{importance:.2f}%', va='center', ha='left', fontsize=12, color='black')

    # Remove unused subplots
    for i in range(len(train_data["Region"].unique()), len(axes.flat)):
        fig.delaxes(axes.flatten()[i])

    plt.tight_layout()  
    plt.show()
