In [1]:
import pickle
import numpy as np
import boto3
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import joypy
from matplotlib import cm
from scipy.stats import gaussian_kde
import calendar

sns.set_theme(style="darkgrid")
sns.set(style='ticks')
sns.set_style('white')
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_regression

import warnings
warnings.filterwarnings("ignore")

def load_data_from_s3(bucket_name, master_table_key, resampled_data_key):
    
    # Create an S3 client
    s3 = boto3.client('s3')

    # Download files from S3
    s3.download_file(bucket_name, master_table_key, 'master_table.pkl')
    s3.download_file(bucket_name, resampled_data_key, 'resampled_data.pkl')

    # Load files into Pandas DataFrames
    master_table_df = pd.read_pickle('master_table.pkl')
    resampled_data_df = pd.read_pickle('resampled_data.pkl')

    return master_table_df, resampled_data_df

# loadig data from s3 bucket 
bucket_name = 'cldatabucket'
master_table_key = 'master_table.pkl'
resampled_data_key = 'resampled_data.pkl'

master_table, resampled_data = load_data_from_s3(bucket_name, master_table_key, resampled_data_key)

# Filter data for the years 2015 to 2020
filtered_data1 = master_table.loc['2015-01-01':'2020-01-01']

# Filter data for the years 2015 to 2020
filtered_data = resampled_data.loc['2015-01-01':'2020-01-01']

input_columns_without_region = ['Month', 'Day', 'Hour','Precipitation', 'AirTemperature', 'WetBulbTemperature','DewTemperature','RelativeHumidity%',
                                'SeaPressure', 'StationPressure','WeekOfMonth', 'Quarter', 'DayOfYear', 'TemperatureDifference','PressureDifference',
                                'DayNightTempDifference','DayOfWeek_Friday', 'DayOfWeek_Monday', 'DayOfWeek_Saturday','DayOfWeek_Sunday', 'DayOfWeek_Thursday', 
                                'DayOfWeek_Tuesday','DayOfWeek_Wednesday', 'Season_Autumn', 'Season_Spring','Season_Summer', 'Season_Winter',
                                'TimeOfDay_Afternoon','TimeOfDay_Evening', 'TimeOfDay_Morning', 'TimeOfDay_Night', 'IsWeekend_False', 'IsWeekend_True', 
                                'TemperatureLevel_Cold', 'TemperatureLevel_Hot','TemperatureLevel_Mild', 'TemperatureLevel_Very Cold','TemperatureLevel_Warm', 
                                'PrecipitationLevel_Heavy Precipitation','PrecipitationLevel_Light Precipitation','PrecipitationLevel_Moderate Precipitation',
                                'PrecipitationLevel_No Precipitation', 'PressureLevel_High Pressure','PressureLevel_Low Pressure','PressureLevel_Normal Pressure',
                                'HumidityLevel_High Moist', 'HumidityLevel_Low Moist','HumidityLevel_Moderate Moist'
                               ]
output_columns = ["TotalDemand"]

def mi_score_plot(resampled_data, input_columns_without_region, output_columns):
   
    # Create a RandomState instance with a specific seed
    random_state = np.random.RandomState(seed=42)

    # Train-test split without shuffling
    train_data, test_data = train_test_split(resampled_data, shuffle=False, test_size=0.2, random_state=random_state)

    # Select input and output data with "Region" for training data
    train_input_data = train_data[input_columns_without_region] 
    train_output_data = train_data[output_columns]

    # Compute mutual information scores for regression problem (excluding 'Region' column)
    mi_scores = mutual_info_regression(train_input_data[input_columns_without_region], 
                                       train_output_data.values.ravel(), 
                                       random_state=random_state)

    # Create a DataFrame to store the mutual information scores along with feature names
    mi_scores_df = pd.DataFrame({"Feature": input_columns_without_region, 
                                 "Mutual_Information_Score": mi_scores})
    
    # Remove features with zero Mutual Information Score
    mi_scores_df = mi_scores_df[mi_scores_df['Mutual_Information_Score'] > 0]

    # Sort DataFrame by Mutual Information Score in descending order
    mi_scores_df = mi_scores_df.sort_values(by='Mutual_Information_Score', ascending=False)

    # Reverse the order of DataFrame
    mi_scores_df = mi_scores_df[::-1]

    plt.figure(figsize=(14, 10)) 
    bars = plt.barh(mi_scores_df['Feature'], mi_scores_df['Mutual_Information_Score'], color='skyblue', edgecolor='black')

    # Add scores at the end of the bars
    for bar, score in zip(bars, mi_scores_df['Mutual_Information_Score']):
        plt.text(bar.get_width(), bar.get_y() + bar.get_height()/2, round(score, 5), 
                 va='center', ha='left', fontsize=10, color='black')

    plt.xlabel('Mutual Information Score', fontsize=12)
    plt.ylabel('Feature', fontsize=12)
    plt.title('Mutual Information Score for Features', fontsize=16, fontweight="bold")
    plt.grid(axis='x', linestyle='--', alpha=0.7)  
    plt.xticks(fontsize=10)  
    plt.yticks(fontsize=10) 
    plt.tight_layout() 
    plt.show()
    
def regional_demand_line_plot(filtered_data1):
    
    # Define a color palette for the regions
    colors = ["red", "orange", "green", "purple", "blue"]

    # Set style
    sns.set(style="whitegrid")

    # Create subplots for each region
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 12), constrained_layout=True)

    # Loop through regions and plot
    for i, (region, color) in enumerate(zip(filtered_data1["Region"].unique(), colors)):
        row = i // 3
        col = i % 3

        ax = axes[row, col]

        # Filter data for the current region
        region_data = filtered_data1[filtered_data1["Region"]==region]
        
        # Select only numerical columns for resampling
        numerical_columns = region_data.select_dtypes(include=['float64', 'int64']).columns
        
        # Resample data to daily frequency for numerical columns
        region_data_resampled = region_data[numerical_columns].resample('M').mean()

        # Plot demand over time
        sns.lineplot(data=region_data_resampled, x=region_data_resampled.index, y="TotalDemand", ax=ax, color=color)

        # Set title and labels
        ax.set_title(f"{region} Energy Demand", fontweight="bold", fontsize=16, color='black')
        ax.set_xlabel("Year", color='black')
        ax.set_ylabel("Total Demand (MW)", color='black')

    # Remove any unused subplots
    for i in range(len(filtered_data1["Region"].unique()), len(axes.flat)):
        fig.delaxes(axes.flatten()[i])

    # Show plot
    plt.show()

def regional_demand_pie_chart(filtered_data1):
   
    # Group the data by 'Region' and calculate the mean of 'TotalDemand'
    average_demand_by_region = filtered_data1.groupby('Region')['TotalDemand'].mean()
    
    # Convert the Series to a DataFrame
    average_demand_by_region_df = average_demand_by_region.reset_index()
    
    colors = ['#FFC107', '#FF5722', '#FF9800', '#F44336', '#FFEB3B']

    # Set up the figure and title
    plt.figure(figsize=(10, 8))
    plt.title("Average Energy Demand Distribution by Region (2015-2020)", fontweight="bold", fontsize=14, color='black')

    # Extracting the 'TotalDemand' values from the DataFrame
    demand_values = average_demand_by_region_df['TotalDemand']

    # Plotting the pie chart
    _, _, autotexts = plt.pie(demand_values, labels=average_demand_by_region_df['Region'], colors=colors,
                              autopct='%1.1f%%', startangle=90,
                              wedgeprops={'linewidth': 3.0, 'edgecolor': 'white'},
                              textprops={'size': 'large', 'fontweight': 'bold'},
                              pctdistance=0.7, shadow=True)

    # Adding a legend
    plt.legend(title='Region', loc='best', bbox_to_anchor=(1, 0.5))
    plt.ylabel('')

    # Customizing the text for percentages
    for autotext in autotexts:
        autotext.set_fontweight('bold')
        autotext.set_fontsize('large')

    plt.tight_layout()
    plt.show()

def monthly_demand_region_line_plot(filtered_data):
    
    # Define regions to include
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    
    # Group the filtered data by month and region, and calculate the average TotalDemand
    average_monthly_demand = filtered_data[filtered_data['Region'].isin(regions)].groupby(['Month', 'Region'])['TotalDemand'].mean().unstack()
    
    # Define custom colors, line styles, and markers for each region
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    line_styles = ['-']
    markers = ['o', 's', 'D', '^', 'v']

    plt.figure(figsize=(10, 6))

    for i, region in enumerate(regions):
        plt.plot(average_monthly_demand.index, average_monthly_demand[region], 
                 label=region, 
                 color=colors[i], 
                 linestyle=line_styles[0],  
                 marker=markers[i],
                 markersize=8)

    plt.title('Average Monthly Energy Demand by Region', fontsize=14, fontweight='bold', color='black')
    plt.xlabel('Month', fontsize=11, color='black')
    plt.ylabel('Energy Demand (MW)', fontsize=12, color='black')
    plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], fontsize=11, color='black')
    plt.yticks(fontsize=11, color='black')
    plt.legend(title='Regions', fontsize=11, loc='upper left', bbox_to_anchor=(1, 1))  
    plt.grid(True, linestyle='-', alpha=0.7)
    plt.tight_layout()
    plt.show()

def seasonal_demand_region_bar_plot(filtered_data1):
    
    # Group the data by 'Season' and 'Region', and calculate the average TotalDemand
    average_demand_season = filtered_data1.groupby(['Season', 'Region'])['TotalDemand'].mean().unstack()
    average_demand_season_transposed = average_demand_season.transpose().reset_index()

    # Melt the DataFrame to convert it to long format for Seaborn plotting
    average_demand_season_melted = average_demand_season_transposed.melt(id_vars='Region', var_name='Season', value_name='Average Demand')

    # Plotting the graph using Seaborn
    plt.figure(figsize=(10, 6))
    ax = sns.barplot(data=average_demand_season_melted, x='Region', y='Average Demand', hue='Season', palette='rocket')

    plt.title('Average Energy Demand by Region and Season', fontweight="bold", fontsize=14, color='black')
    plt.xlabel('Region', fontsize=12, color='black')
    plt.ylabel('Energy Demand (MW)', fontsize=12, color='black')
    plt.xticks( fontsize=11, color='black')
    plt.yticks( fontsize=11, color='black')
    plt.legend(title='Season', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def season_demand_percentage_stacked_plot(filtered_data1):
   
    # Filter data for summer and winter
    summer_data = filtered_data1[filtered_data1['Season'] == 'Summer']
    winter_data = filtered_data1[filtered_data1['Season'] == 'Winter']
    
    # Group the weekend data by 'Region' and calculate the average Total Demand
    average_summer_distribution = summer_data.groupby('Region')['TotalDemand'].mean()

    # Group the weekday data by 'Region' and calculate the average Total Demand
    average_winter_distribution = winter_data.groupby('Region')['TotalDemand'].mean()

    # Create a DataFrame with both weekend and weekday average distributions
    average_season_distribution = pd.DataFrame({'Summer': average_summer_distribution, 'Winter': average_winter_distribution})

    # Calculate the sum of each row (for normalization)
    row_sums = average_season_distribution.sum(axis=1)

    # Calculate the percentage of each value relative to the row sum
    percentage_data = average_season_distribution.div(row_sums, axis=0) * 100
    
    # Define colors for Summer and Winter
    colors = ['#2ca02c', '#1f77b4']  
    
    # Plot the percentage distribution as a stacked bar chart
    ax = percentage_data.plot(kind='bar', stacked=True, color=colors, figsize=(10, 6))

    # Set the title and axis labels
    plt.title('Percentage Distribution of Average Demand during Summer and Winter by Region', fontweight="bold", fontsize=12, color='black')
    plt.xlabel('Region', fontsize=11, color='black')
    plt.ylabel('Percentage', fontsize=11, color='black')

    # Add legend outside the plot
    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), ncol=1)

    # Customize tick labels
    plt.xticks(rotation=360, ha='right', fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Add grid lines
    plt.grid(axis='y', linestyle='-', alpha=0.7)

    # Add percentage values on the bars
    for n in percentage_data:
        for i, (cs, ab, pc) in enumerate(zip(percentage_data.iloc[:, :].cumsum(1)[n],
                                             percentage_data[n], percentage_data[n])):
            plt.text(i, cs - ab / 2, str(np.round(pc, 1)) + '%',
                     ha='center', va='center', fontweight="bold", fontsize=10)

    plt.tight_layout()
    plt.show()

def hourly_demand_region_step_plot(filtered_data1):

    # Grouping the filtered data by month and region, and calculating the average TotalDemand
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    average_daytime_demand = filtered_data1[filtered_data1['Region'].isin(regions)].groupby(['Hour', 'Region'])['TotalDemand'].mean().unstack()

    # Transpose the DataFrame to swap rows and columns and Reset the index to make the 'Region' column a regular column instead of an index
    average_daytime_demand_transposed = average_daytime_demand.transpose()
    average_daytime_demand_transposed = average_daytime_demand.transpose().reset_index()

    # Melt the DataFrame to convert it from wide to long format
    average_demand_daytime_melted = average_daytime_demand_transposed.melt(id_vars='Region', var_name='Hour', value_name='Average Demand')

    # Create subplots for each region
    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 15), constrained_layout=True)
    axes = axes.flatten()

    # Define colors for each region
    region_colors = {'NSW': 'blue', 'SA': 'orange', 'VIC': 'green', 'QLD': 'red', 'TAS': 'purple'}

    # Plot for each region
    for i, region in enumerate(regions):
        region_data = average_demand_daytime_melted[average_demand_daytime_melted['Region'] == region]
        ax = axes[i]

        # Plot stepped line graph with region-specific color
        ax.step(region_data['Hour'], region_data['Average Demand'], label=region, color=region_colors[region], where='mid')

        # Set title and axis labels
        ax.set_title(f'{region} - Average Hourly Energy Demand', fontweight="bold", fontsize=14, color='black')
        ax.set_xlabel('Hour', fontsize=12, color='black')
        ax.set_ylabel('Energy Demand (MW)', fontsize=12, color='black')

        # Style grid
        ax.grid(True, linestyle='-', alpha=0.7)

        # Increase tick label font size
        ax.tick_params(axis='both', which='major', labelsize=11, color='black')

    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])

    # Adjust layout
    plt.tight_layout()
    plt.show()

def hourly_demand_weekdays_heatmap(filtered_data1):
    
    # Pivot the data to have weekdays as rows, hours as columns, and energy demand as values
    hourly_weekday_data = filtered_data1.pivot_table(index='Hour', columns='DayOfWeek', values='TotalDemand', aggfunc='mean')
    
    # Define the order of weekdays for sorting
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Reorder columns according to the defined order
    hourly_weekday_data = hourly_weekday_data[day_order]

    # Create the heatmap
    plt.figure(figsize=(12, 8))
    heatmap = sns.heatmap(hourly_weekday_data, cmap='viridis', annot=True, fmt=".1f", linewidths=0.5)

    # Reduce the font size of annotations
    for text in heatmap.texts:
        text.set_fontsize(9)  

    # Set the title and axis labels
    plt.title('Average Hourly Energy Demand across Days of the Week', fontsize=14, fontweight='bold', color='black')
    plt.xlabel('Weekday', fontsize=11, color='black')
    plt.ylabel('Hour of Day', fontsize=11, color='black')

    # Customize the color bar
    cbar = heatmap.collections[0].colorbar
    cbar.set_label('Energy Demand (MW)', fontsize=11, color='black')

    # Customize the tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Customize the grid lines
    plt.grid(True, linestyle='-', linewidth=0.5, alpha=0.7)

    # Customize the background color
    plt.gca().set_facecolor('#F5F5F5')

    # Customize the border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    plt.tight_layout()
    plt.show()

def hourly_demand_months_heatmap(filtered_data1):
    
    # Pivot the data to have weekdays as rows, hours as columns, and energy demand as values
    hourly_months_data = filtered_data1.pivot_table(index='Hour', columns='Month', values='TotalDemand', aggfunc='mean')

    # Create the heatmap
    plt.figure(figsize=(12, 8))
    heatmap = sns.heatmap(hourly_months_data, cmap='magma', annot=True, fmt=".1f", linewidths=0.5)

    # Reduce the font size of annotations
    for text in heatmap.texts:
        text.set_fontsize(9)  

    # Set the title and axis labels
    plt.title('Average Hourly Energy Demand across Months', fontsize=14, fontweight='bold', color='black')
    plt.xlabel('Month', fontsize=11, color='black')
    plt.ylabel('Hour', fontsize=11, color='black')

    # Customize the color bar
    cbar = heatmap.collections[0].colorbar
    cbar.set_label('Energy Demand (MW)', fontsize=11, color='black')

    # Customize the tick labels for x-axis (months)
    months_short = [calendar.month_abbr[i] for i in range(1, 13)]  # Get short form of months
    num_months = len(months_short)
    plt.xticks(ticks=np.arange(num_months) + 0.4, labels=months_short, fontsize=10, color='black', ha='left')    
    
    # Customize the tick labels for y-axis (hours)
    plt.yticks(fontsize=10, color='black')

    # Customize the grid lines
    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)

    # Customize the background color
    plt.gca().set_facecolor('#F5F5F5')

    # Customize the border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    plt.tight_layout()
    plt.show()

def weekday_demand_percentage_stacked_plot(filtered_data1):
   
    # Filter the data for weekend and weekday separately
    weekend_data = filtered_data1[filtered_data1['IsWeekend']]
    weekday_data = filtered_data1[~filtered_data1['IsWeekend']]

    # Group the weekend data by 'Region' and calculate the average Total Demand
    average_weekend_distribution = weekend_data.groupby('Region')['TotalDemand'].mean()

    # Group the weekday data by 'Region' and calculate the average Total Demand
    average_weekday_distribution = weekday_data.groupby('Region')['TotalDemand'].mean()

    # Create a DataFrame with both weekend and weekday average distributions
    average_weekday_end_distribution = pd.DataFrame({'Weekday': average_weekday_distribution, 'Weekend': average_weekend_distribution})

    # Calculate the sum of each row (for normalization)
    row_sums = average_weekday_end_distribution.sum(axis=1)

    # Calculate the percentage of each value relative to the row sum
    percentage_data = average_weekday_end_distribution.div(row_sums, axis=0) * 100
    
    # Plot the percentage distribution as a stacked bar chart
    ax = percentage_data.plot(kind='barh', stacked=True, colormap="Dark2", figsize=(10, 6))

    # Set the title and axis labels
    plt.title('Percentage Distribution of Average Weekday and Weekend Demand by Region', fontweight="bold", fontsize=12, color='black')
    plt.ylabel('Region', fontsize=11, color='black')

    # Add legend outside the plot
    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), ncol=1)

    # Customize tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Add grid lines
    plt.grid(axis='x', linestyle='-', alpha=0.7)

    # Add percentage values on the bars
    for n in percentage_data:
        for i, (cs, ab, pc) in enumerate(zip(percentage_data.iloc[:, :].cumsum(1)[n],
                                             percentage_data[n], percentage_data[n])):
            plt.text(cs - ab / 2, i, str(np.round(pc, 1)) + '%',
                     va='center', ha='center', fontweight="bold", fontsize=10)

    plt.tight_layout()
    plt.show()

def dayofweek_demand_region_area_plot(filtered_data1):
   
    # Grouping the filtered data by day of week and region, and calculating the average TotalDemand
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    average_weekday_demand = filtered_data1[filtered_data1['Region'].isin(regions)].groupby(['DayOfWeek', 'Region'])['TotalDemand'].mean().unstack()
    average_weekday_demand_transposed = average_weekday_demand.transpose()
    average_weekday_demand_transposed = average_weekday_demand_transposed.reset_index()
    average_weekday_melted = average_weekday_demand_transposed.melt(id_vars='Region', var_name='DayOfWeek', value_name='Average Demand')

    # Define the desired order of days
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    average_weekday_melted['DayOfWeek'] = pd.Categorical(average_weekday_melted['DayOfWeek'], categories=day_order, ordered=True)
    average_weekday_melted = average_weekday_melted.sort_values(by='DayOfWeek')

    # Define the unique regions and corresponding colors
    regions = average_weekday_melted['Region'].unique()
    colors = {'NSW': 'blue', 'QLD': 'orange', 'SA': 'green', 'TAS': 'red', 'VIC': 'purple'}

    # Plotting
    plt.figure(figsize=(10, 6))

    # Loop through unique regions
    for region in regions:
        # Filter data for the current region
        data = average_weekday_melted[average_weekday_melted['Region'] == region]
        # Plotting area chart for the current region
        plt.fill_between(data['DayOfWeek'], data['Average Demand'], color=colors[region], alpha=0.5, label=region)

    plt.xlabel('Day of the Week', fontsize=11, color='black')  
    plt.ylabel('Average Demand (MW)', fontsize=11, color='black')  
    plt.title('Average Energy Demand across Days of the Week by Region', fontsize=14, fontweight='bold', color='black') 
    plt.legend(title='Region', loc='upper left', bbox_to_anchor=(1, 1)) 
    plt.grid(True, linestyle='-', linewidth=0.6, color='gray')  
    plt.xticks(fontsize=10, rotation=45, color='black')  
    plt.yticks(fontsize=10, color='black')  
    plt.tight_layout()  

    # Customize background color and border
    plt.gca().patch.set_facecolor('lightgray')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)

    plt.show()

def day_of_month_demand_point_plot(filtered_data1):

    sns.set_style("whitegrid")

    # Define custom color palette
    custom_palette = ['#4C72B0']  

    # Create the catplot with adjusted size, style, and color
    plt.figure(figsize=(12, 8))
    sns.catplot(y="TotalDemand", x="Day", kind="point", data=filtered_data1, color=custom_palette[0], capsize=0.1, height=6, aspect=2, marker='o')

    # Set title and labels
    plt.title('Average Energy Demand Variation by Day of the Month', fontweight='bold', fontsize=12, color='black')
    plt.xlabel('Day', fontsize=11, color='black')
    plt.ylabel('Energy Demand (MW)', fontsize=11, color='black')

    # Customize tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Add grid lines
    plt.grid(True, linestyle='-', linewidth=0.5, alpha=0.7)

    # Show plot
    plt.tight_layout()
    plt.show()

def weather_correlation_plot(resampled_data):
   
    # Define the columns you want to include in the correlation matrix
    columns_of_interest = ['Precipitation', 'RelativeHumidity%', 'AirTemperature', 'WetBulbTemperature', 'DewTemperature', 
                           'SeaPressure', 'StationPressure', 'TotalDemand']
    
    # Convert categorical column 'Region' to numerical
    region_mapping = {'NSW': 1, 'QLD': 2, 'SA': 3, 'TAS': 4, 'VIC': 5}
    resampled_data['Region'] = resampled_data['Region'].map(region_mapping)
    
    # Filter out the correlation matrix for the specified columns
    correlation_matrix = resampled_data.corr().loc[columns_of_interest, columns_of_interest]

    # Define the figure size
    plt.figure(figsize=(16, 9))

    # Customize the annot
    annot_kws = {'fontsize': 10,       
                 'fontstyle': 'italic',
                 'fontfamily': 'serif', 
                 'alpha': 1}            

    # Customize the cbar
    cbar_kws = {"shrink": 1,         
                'extend': 'min',      
                'extendfrac': 0.1,    
                "drawedges": True}    

    # Take upper correlation matrix
    matrix = np.triu(correlation_matrix)

    # Generate heatmap correlation
    ax = sns.heatmap(correlation_matrix, mask=matrix, cmap='rainbow', annot=True, linewidth=1.5, fmt=".3f", annot_kws=annot_kws,
                     cbar_kws=cbar_kws)

    # Set the title
    plt.title('Correlation Heatmap for Weather Parameters', fontsize=20,  fontweight="bold",  color='black')
    
    # Customize tick labels
    plt.xticks(fontsize=12, color='black')
    plt.yticks(fontsize=12, color='black')

    # Show the plot
    plt.show()

def air_temperature_region_violin_plot(filtered_data1):
  
    # Set style
    sns.set(style="whitegrid")

    # Create the bean plot using catplot
    sns.catplot(data=filtered_data1, x='AirTemperature', y='Region', kind='violin', palette='Set2', inner='stick',
                height=8, aspect=1.5)

    # Set labels and title
    plt.xlabel('Air Temperature (°C)', fontsize=11, color='black')
    plt.ylabel('Region', fontsize=11, color='black')
    plt.title('Distribution of Air Temperature by Region', fontsize=14, fontweight='bold', color='black')

    # Customize tick labels
    plt.xticks(fontsize=11, color='black')
    plt.yticks(fontsize=11, color='black')

    # Customize grid lines
    plt.grid(True, linestyle='-', linewidth=0.5, alpha=0.6)

    # Customize background color
    plt.gca().set_facecolor('#F0F0F0')

    # Customize border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    # Show plot
    plt.tight_layout()
    plt.show()

def air_temperature_demand_scatter_plot(filtered_data1):
    
    # Define regions and colors
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    region_colors = sns.color_palette("Dark2", n_colors=len(regions))
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Select only numerical columns for resampling
        numerical_columns = region_data.select_dtypes(include=['float64', 'int64']).columns
        
        # Resample data to daily frequency for numerical columns
        region_data_resampled = region_data[numerical_columns].resample('D').mean()
        
        # Plot scatter plot for DewTemperature vs TotalDemand with regression line
        sns.regplot(data=region_data_resampled, x='AirTemperature', y='TotalDemand', ax=axes[i], 
                    scatter_kws={'s': 50, 'color': region_colors[i], 'alpha': 0.7, 'edgecolor': 'black'}, 
                    line_kws={'color': 'black', 'alpha': 1, 'linewidth': 2.0})
        
        # Set title for each subplot
        axes[i].set_title(f'{region}- Distribution of Air Temperature vs Energy Demand', fontweight='bold', fontsize=11, color='black')
        
        # Set labels
        axes[i].set_xlabel('Air Temperature (°C)' , fontsize=10, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=10, color='black')
        
        # Customize tick labels
        axes[i].tick_params(labelsize=8)
        
        # Add gridlines
        axes[i].grid(True)
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def wetbulb_temperature_region_ridgeline_plot(filtered_data1):
    
    # Create the Ridgeline plot
    fig, ax = joypy.joyplot(filtered_data1, by='Region', column='WetBulbTemperature', 
                             colormap=cm.plasma, fade=True, linewidth=1, alpha=0.7,
                             range_style='own', figsize=(12, 8))

    # Set the title
    plt.suptitle('Distribution of Wet Bulb Temperature by Region', fontsize=14, fontweight='bold', color='black')

    # Customize axis labels and ticks
    for axis in ax:
        axis.set_xlabel('Wet Bulb Temperature (°C)', fontsize=11, color='black')
        axis.tick_params(axis='both', which='major', labelsize=11, colors='black')

    # Customize grid lines
    for axis in ax:
        axis.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
        
    # Customize background color
    fig.patch.set_facecolor('#F5F5F5')

    # Customize border color
    fig.patch.set_edgecolor('black')

    # Set common y-axis label at the middle
    fig.text(0.01, 0.5, 'Density', ha='center', va='center', fontsize=11, color='black', rotation='vertical')

    # Show the plot
    plt.tight_layout()
    plt.show()

def wetbulb_temperature_demand_scatter_plot(filtered_data1):
    
    # Define regions and colors
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    region_colors = ['red', 'blue', 'green', 'orange', 'purple']
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Select only numerical columns for resampling
        numerical_columns = region_data.select_dtypes(include=['float64', 'int64']).columns
        
        # Resample data to daily frequency for numerical columns
        region_data_resampled = region_data[numerical_columns].resample('D').mean()
        
        # Plot scatter plot for WetBulbTemperature vs TotalDemand with regression line
        sns.regplot(data=region_data_resampled, x='WetBulbTemperature', y='TotalDemand', ax=axes[i], 
                    scatter_kws={'s': 50, 'color': region_colors[i], 'alpha': 0.7, 'edgecolor': 'black'}, 
                    line_kws={'color': 'black', 'alpha': 1, 'linewidth': 2.0})
        
        # Set title for each subplot
        axes[i].set_title(f'{region} - Distribution of Wet Bulb Temperature vs Energy Demand', fontweight='bold', fontsize=11, color='black')
        
        # Set labels
        axes[i].set_xlabel('Wet Bulb Temperature (°C)' , fontsize=11, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=11, color='black')
        
        # Customize tick labels
        axes[i].tick_params(labelsize=10, color='black')
        
        # Add gridlines
        axes[i].grid(True)
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def dew_temperature_region_boxenplot(filtered_data1):
    
    # Set style
    sns.set(style="whitegrid")

    # Create the boxen plot using boxenplot
    plt.figure(figsize=(10, 6))
    sns.boxenplot(x="Region", y="DewTemperature", data=filtered_data1, palette="Dark2", linewidth=1.8)

    # Set labels and title
    plt.title('Distribution of Dew Point Temperature by Region', fontweight="bold", fontsize=14, color='black')
    plt.xlabel('Region', fontsize=11, color='black')
    plt.ylabel('Dew Point Temperature (°C)', fontsize=11, color='black')

    # Customize tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Customize background color
    plt.gca().set_facecolor('#F0F0F0')

    # Customize border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    # Show plot
    plt.tight_layout()
    plt.show()

def dew_temperature_demand_hexbin_plot(filtered_data1):
    
    # Define regions
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Plot hexbin with log-scale bins
        axes[i].hexbin(region_data['DewTemperature'], region_data['TotalDemand'], gridsize=20, bins='log', mincnt=0, edgecolors="none", cmap="viridis")
        
        # Add colorbar
        cb = axes[i].figure.colorbar(axes[i].collections[0], ax=axes[i], label='Levels of Bin')
        
        # Set title for each subplot
        axes[i].set_title(f'{region} - Distribution of Dew Point Temperature vs Energy Demand', fontweight='bold', fontsize=10, color='black')
        
        # Set labels
        axes[i].set_xlabel('Dew Point Temperature (°C)', fontsize=9, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=9, color='black')
        
        # Customize tick labels
        axes[i].tick_params(axis='both', labelsize=8, colors='black')
        
        # Customize grid lines
        axes[i].grid(True, linestyle='-', linewidth=0.5, alpha=0.7)
        
        # Customize background color
        axes[i].set_facecolor('#F0F0F0')
        
        # Customize border color
        for spine in axes[i].spines.values():
            spine.set_edgecolor('black')
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])        
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def precipitation_strip_plot(filtered_data1):
    
    # List of regions
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']

    # Define colors for each region
    region_colors = {'NSW': 'blue', 'SA': 'orange', 'VIC': 'green', 'QLD': 'red', 'TAS': 'purple'}

    # Set style
    sns.set(style="whitegrid")

    # Create strip plot for Precipitation for each region
    sns.catplot(x='Region', y='Precipitation', data=filtered_data1, kind='strip', jitter=0.25,
                height=6, linewidth=0.8, aspect=1.5, order=regions, palette=region_colors.values())

    # Set labels and title
    plt.xlabel('Region', fontsize=11, color='black')
    plt.ylabel('Precipitation (mm)', fontsize=11, color='black')
    plt.title('Distribution of Precipitation by Region', fontsize=14, fontweight='bold', color='black')

    # Customize tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Customize grid lines
    plt.grid(True, linestyle='-', linewidth=0.5, alpha=0.7)

    # Customize background color
    plt.gca().set_facecolor('#F0F0F0')

    # Customize border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    # Show plot
    plt.tight_layout()
    plt.show()

def precipitation_demand_hexbin_plot(filtered_data1):
    
    # Define regions
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Plot hexbin with log-scale bins
        axes[i].hexbin(region_data['Precipitation'], region_data['TotalDemand'], gridsize=20, bins='log', mincnt=0, edgecolors="none", cmap="inferno")
        
        # Add colorbar
        cb = axes[i].figure.colorbar(axes[i].collections[0], ax=axes[i], label='Levels of Bin')
        
        # Set title for each subplot
        axes[i].set_title(f'{region}- Distribution of Precipitation vs Energy Demand', fontweight='bold', fontsize=10, color='black')
        
        # Set labels
        axes[i].set_xlabel('Precipitation (mm)', fontsize=9, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=9, color='black')
        
        # Customize tick labels
        axes[i].tick_params(axis='both', labelsize=8, colors='black')
        
        # Customize grid lines
        axes[i].grid(True, linestyle='-', linewidth=0.5, alpha=0.7)
        
        # Customize background color
        axes[i].set_facecolor('#F0F0F0')
        
        # Customize border color
        for spine in axes[i].spines.values():
            spine.set_edgecolor('black')
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])        
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def relative_humidity_demand_contour_plot(filtered_data1):
    
    # Define regions
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Create 2D histogram for RelativeHumidity% vs TotalDemand
        x = region_data['RelativeHumidity%']
        y = region_data['TotalDemand']
        nbins = 25  
        H, xedges, yedges = np.histogram2d(x, y, bins=nbins)
        
        # Create contour plot
        X, Y = np.meshgrid(xedges[:-1], yedges[:-1])
        contour = axes[i].contourf(X, Y, H.T, cmap='viridis')
        
        # Set title for each subplot
        axes[i].set_title(f'{region}- Distribution of RelativeHumidity vs Energy Demand', fontweight='bold', fontsize=11, color='black')
        
        # Set labels
        axes[i].set_xlabel('Relative Humidity (%)', fontsize=10, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=10, color='black')
        
        # Customize tick labels
        axes[i].tick_params(labelsize=9)
        
        # Add colorbar
        cbar = plt.colorbar(contour, ax=axes[i])
        cbar.set_label('Density')
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def station_pressure_region_kde_plot(filtered_data1):
   
    # Set the style
    sns.set(style="whitegrid")

    # Create the KDE plot
    plt.figure(figsize=(10, 6))

    # Loop through unique regions to plot KDE curves and create legend entries
    for region in filtered_data1['Region'].unique():
        sns.kdeplot(data=filtered_data1[filtered_data1['Region'] == region], x='StationPressure', fill=False, label=region, linewidth=2)

    # Set the title and labels
    plt.title('Distribution of Station Pressure by Region', fontsize=14, fontweight='bold', color='black')
    plt.xlabel('Station Pressure (hPa)', fontsize=11, color='black')
    plt.ylabel('Density', fontsize=11, color='black')

    # Customize legend
    plt.legend(title='Region', loc='center left', bbox_to_anchor=(1, 0.5), fontsize=10)

    # Customize tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Customize grid lines
    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)

    # Customize background color
    plt.gca().set_facecolor('#F5F5F5')

    # Customize border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    plt.tight_layout()

    # Show plot
    plt.show()

def station_pressure_demand_gaussian_kde_plot(filtered_data1):
    
    # Define regions
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Select only numerical columns
        numerical_columns = ['StationPressure', 'TotalDemand']
        data = region_data[numerical_columns].values.T
        
        # Fit Gaussian KDE
        kde = gaussian_kde(data)
        
        # Evaluate on a regular grid
        xgrid = np.linspace(data[0].min(), data[0].max(), 100)
        ygrid = np.linspace(data[1].min(), data[1].max(), 100)
        Xgrid, Ygrid = np.meshgrid(xgrid, ygrid)
        Z = kde(np.vstack([Xgrid.ravel(), Ygrid.ravel()]))
        
        # Plot KDE as an image
        im = axes[i].imshow(Z.reshape(Xgrid.shape), origin='lower', aspect='auto', extent=[data[0].min(), data[0].max(), data[1].min(), data[1].max()], 
                            cmap='cividis')
        
        # Set title for each subplot
        axes[i].set_title(f'{region}- Distribution of Station Pressure vs Energy Demand', fontweight='bold', fontsize=10, color='black')
        
        # Set labels
        axes[i].set_xlabel('Station Pressure (hPa)', fontsize=9, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=9, color='black')
        
        # Customize tick labels
        axes[i].tick_params(axis='both', which='major', labelsize=8)
        
        # Customize grid lines
        axes[i].grid(True, linestyle='-', linewidth=0.5, alpha=0.7)
        
        # Customize background color
        axes[i].set_facecolor('#F0F0F0')

        # Customize border color
        axes[i].spines['bottom'].set_color('black')
        axes[i].spines['left'].set_color('black')
        
        # Add colorbar
        cbar = fig.colorbar(im, ax=axes[i])
        cbar.set_label('Density', fontsize=8, color='black')
        cbar.ax.tick_params(labelsize=8, colors='black')
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def sea_pressure_region_kde_plot(filtered_data1):
   
    # Define custom colors for regions
    region_colors = {'NSW': 'blue', 'SA': 'orange', 'VIC': 'green', 'QLD': 'red', 'TAS': 'purple'}

    # Set the style
    sns.set(style="whitegrid")

    # Create the KDE plot
    plt.figure(figsize=(10, 6))

    # Loop through unique regions to plot KDE curves and create legend entries
    for region in filtered_data1['Region'].unique():
        sns.kdeplot(data=filtered_data1[filtered_data1['Region'] == region], x='SeaPressure', fill=False, label=region, linewidth=2, color=region_colors[region])

    # Set the title and labels
    plt.title('Distribution of Sea Pressure by Region', fontsize=14, fontweight='bold', color='black')
    plt.xlabel('Sea Pressure (hPa)', fontsize=11, color='black')
    plt.ylabel('Density', fontsize=11, color='black')

    # Customize legend
    plt.legend(title='Region', loc='center left', bbox_to_anchor=(1, 0.5), fontsize=10)

    # Customize tick labels
    plt.xticks(fontsize=10, color='black')
    plt.yticks(fontsize=10, color='black')

    # Customize grid lines
    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)

    # Customize background color
    plt.gca().set_facecolor('#F5F5F5')

    # Customize border color
    plt.gca().spines['bottom'].set_color('black')
    plt.gca().spines['left'].set_color('black')

    plt.tight_layout()

    # Show plot
    plt.show()

def sea_pressure_demand_gaussian_kde_plot(filtered_data1):
    
    # Define regions
    regions = ['NSW', 'SA', 'VIC', 'QLD', 'TAS']
    
    # Create subplots
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    # Iterate over regions
    for i, region in enumerate(regions):
        # Filter data for the region
        region_data = filtered_data1[filtered_data1['Region'] == region]
        
        # Select only numerical columns
        numerical_columns = ['SeaPressure', 'TotalDemand']
        data = region_data[numerical_columns].values.T
        
        # Fit Gaussian KDE
        kde = gaussian_kde(data)
        
        # Evaluate on a regular grid
        xgrid = np.linspace(data[0].min(), data[0].max(), 100)
        ygrid = np.linspace(data[1].min(), data[1].max(), 100)
        Xgrid, Ygrid = np.meshgrid(xgrid, ygrid)
        Z = kde(np.vstack([Xgrid.ravel(), Ygrid.ravel()]))
        
        # Plot KDE as an image
        im = axes[i].imshow(Z.reshape(Xgrid.shape), origin='lower', aspect='auto', extent=[data[0].min(), data[0].max(), data[1].min(), data[1].max()], 
                            cmap='inferno')
        
        # Set title for each subplot
        axes[i].set_title(f'{region}- Distribution of Sea Pressure vs Energy Demand', fontweight='bold', fontsize=10, color='black')
        
        # Set labels
        axes[i].set_xlabel('Sea Pressure (hPa)', fontsize=9, color='black')
        axes[i].set_ylabel('Total Demand (MW)', fontsize=9, color='black')
        
        # Customize tick labels
        axes[i].tick_params(axis='both', which='major', labelsize=8)
        
        # Customize grid lines
        axes[i].grid(True, linestyle='-', linewidth=0.5, alpha=0.7)
        
        # Customize background color
        axes[i].set_facecolor('#F0F0F0')

        # Customize border color
        axes[i].spines['bottom'].set_color('black')
        axes[i].spines['left'].set_color('black')
        
        # Add colorbar
        cbar = fig.colorbar(im, ax=axes[i])
        cbar.set_label('Density', fontsize=8, color='black')
        cbar.ax.tick_params(labelsize=8, colors='black')
    
    # Remove any unused subplots
    for j in range(len(regions), len(axes)):
        fig.delaxes(axes[j])
    
    # Adjust layout
    plt.tight_layout()
    plt.show()

def wetbulb_dew_temperatures_joint_plot(filtered_data1):
    
    # Set style
    sns.set_style("whitegrid")

    # Create JointGrid plot
    joint_ex = sns.JointGrid(data=filtered_data1, x='WetBulbTemperature', y='DewTemperature')
    joint_ex.plot(sns.scatterplot, sns.violinplot)

    # Add regression line
    sns.regplot(data=filtered_data1, x='WetBulbTemperature', y='DewTemperature', ax=joint_ex.ax_joint, scatter=False, color='red')

    # Customize tick labels
    joint_ex.ax_joint.tick_params(axis='x', colors='black')
    joint_ex.ax_joint.tick_params(axis='y', colors='black')

    joint_ex.ax_joint.set_xlabel('Wet Bulb Temperature (°C)', fontsize=11, color='black')
    joint_ex.ax_joint.set_ylabel('Dew Point Temperature (°C)', fontsize=11, color='black')

    # Customize grid lines
    joint_ex.ax_joint.grid(True, linestyle='-', linewidth=0.5, alpha=0.7)

    # Customize border color
    joint_ex.ax_joint.spines['bottom'].set_color('black')
    joint_ex.ax_joint.spines['left'].set_color('black')

    plt.tight_layout()

    plt.show()

def air_dew_temperatures_joint_plot(filtered_data1):
    
    # Set style
    sns.set_style("whitegrid")

    # Create JointGrid plot
    joint_ex = sns.jointplot(data=filtered_data1, x='AirTemperature', y='DewTemperature')
    
    # Add regression line
    sns.regplot(data=filtered_data1, x='AirTemperature', y='DewTemperature', ax=joint_ex.ax_joint, scatter=False, color='red')

    # Customize tick labels
    joint_ex.ax_joint.tick_params(axis='x', colors='black')
    joint_ex.ax_joint.tick_params(axis='y', colors='black')

    joint_ex.ax_joint.set_xlabel('Air Temperature (°C)', fontsize=11, color='black')
    joint_ex.ax_joint.set_ylabel('Dew Point Temperature (°C)', fontsize=11, color='black')

    # Customize grid lines
    joint_ex.ax_joint.grid(True, linestyle='-', linewidth=0.5, alpha=0.7)

    # Customize border color
    joint_ex.ax_joint.spines['bottom'].set_color('black')
    joint_ex.ax_joint.spines['left'].set_color('black')

    plt.tight_layout()

    plt.show()
    
def relativehum_dew_temperatures_joint_plot(filtered_data1):
    
    # Set style
    sns.set_style("whitegrid")

    # Create JointGrid plot
    joint_ex = sns.JointGrid(data=filtered_data1, x='RelativeHumidity%', y='DewTemperature')
    joint_ex.plot(sns.scatterplot, sns.boxenplot)

    # Add regression line
    sns.regplot(data=filtered_data1, x='RelativeHumidity%', y='DewTemperature', ax=joint_ex.ax_joint, scatter=False, color='red')

    # Customize tick labels
    joint_ex.ax_joint.tick_params(axis='x', colors='black')
    joint_ex.ax_joint.tick_params(axis='y', colors='black')

    joint_ex.ax_joint.set_xlabel('RelativeHumidity (%)', fontsize=11, color='black')
    joint_ex.ax_joint.set_ylabel('Dew Point Temperature (°C)', fontsize=11, color='black')

    # Customize grid lines
    joint_ex.ax_joint.grid(True, linestyle='-', linewidth=0.5, alpha=0.7)

    # Customize border color
    joint_ex.ax_joint.spines['bottom'].set_color('black')
    joint_ex.ax_joint.spines['left'].set_color('black')

    plt.tight_layout()

    plt.show()