In [None]:
import json
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from scipy import stats
from plotly.subplots import make_subplots
import os

# List of all JSON filenames and the corresponding variable names
files = [
    "wind_produced", "wind_capacity", "electricity_consumed",
    "nuclear_production", "hydro_production", "CO2",
    "industrial_cogeneration", "district_heating","electricity_produced"
]

# Dictionary to store the DataFrames
dfs = {}

# Load data, create DataFrames, and process them
for file in files:
    # Load JSON data
    with open(os.path.join("jsons",f"{file}.json"), 'r') as f:
        data = json.load(f)
        
    # Create DataFrame and process
    df = pd.DataFrame.from_dict(data)
    df['startTime'] = pd.to_datetime(df['startTime'])
    df['year'] = df['startTime'].dt.year
    df = df[(df['year'] >= 2018) & (df['year'] < 2024)]
    df.set_index('startTime', inplace=True)
    df = df.sort_index()
    
    # Store in dictionary
    dfs[file] = df

# Resample for regression (2020-2023)
wind_produced_yearly = dfs['wind_produced']['value'].resample('Y').mean().reset_index()
wind_capacity_yearly = dfs['wind_capacity']['value'].resample('Y').max().reset_index()
electricity_consumed_yearly = dfs['electricity_consumed']['value'].resample('Y').mean().reset_index()
hydro_production_yearly = dfs['hydro_production']['value'].resample('Y').mean().reset_index()
nuclear_production_yearly = dfs['nuclear_production']['value'].resample('Y').mean().reset_index()
co2_yearly = dfs['CO2']['value'].resample('Y').mean().reset_index()
industrial_cogeneration_yearly = dfs['industrial_cogeneration']['value'].resample('Y').mean().reset_index()
district_heating_yearly = dfs['district_heating']['value'].resample('Y').mean().reset_index()
electricity_produced_yearly = dfs['electricity_produced']['value'].resample('Y').mean().reset_index()

In [None]:
# Taking Maximum of Nuclear Enrgy for future predictions
max_nuclear_2023 = 4033
mean_industrial_cogeneration_2023 = dfs['industrial_cogeneration'][dfs['industrial_cogeneration']['year'] == 2023]['value'].mean()
mean_district_heating_2023 = dfs['district_heating'][dfs['district_heating']['year'] == 2023]['value'].mean()
mean_CO2_2023 = dfs['CO2'][dfs['CO2']['year'] == 2023]['value'].mean()

In [None]:
mean_hydro = hydro_production_yearly['value'][-4:].mean()

In [None]:
# Generate predictions for future years (2024-2045)
future_years = np.arange(wind_produced_yearly['startTime'].dt.year.max()+1, 2046).reshape(-1, 1)
future_dates = pd.to_datetime([f'{year[0]}-12-31' for year in future_years])
nuclear_predictions = np.full(future_years.shape, max_nuclear_2023)
industrial_cogeneration_predictions = np.full(future_years.shape, mean_industrial_cogeneration_2023)
district_heating_predictions = np.full(future_years.shape, mean_district_heating_2023)
co2_predictions = np.full(future_years.shape, mean_CO2_2023)

In [None]:
# Fit a linear model to the wind capacity data only for the last 3 years
recent_years = wind_capacity_yearly['startTime'].dt.year.values[-4:].reshape(-1, 1)
capacity_slope, capacity_intercept, _, _, _ = stats.linregress(wind_capacity_yearly['startTime'].dt.year.values[-3:].reshape(-1, 1).flatten(),wind_capacity_yearly['value'].values[-3:])

hydro_slope, hydro_intercept, _, _, _ = stats.linregress(recent_years.flatten(), hydro_production_yearly['value'].values[-4:])

# Generate wind capacity predictions for future years
capacity_predictions = capacity_slope * future_years + capacity_intercept
production_predictions = 0.31 * capacity_predictions


hydro_predictions = hydro_slope * future_years + hydro_intercept

# Assume electricity consumption will double by 2045
latest_consumption = electricity_consumed_yearly['value'].iloc[-1]
electricity_consumptions = np.linspace(latest_consumption, latest_consumption * 2, num=len(future_years))


# Extend the dataframe with predictions
future_df = pd.DataFrame({
    'startTime': future_dates,
    'wind_capacity': capacity_predictions.flatten(),
    'wind_production': production_predictions.flatten(),
    'nuclear_production': nuclear_predictions.flatten(),
    'hydro_production': hydro_predictions.flatten(),
    'electricity_consumed': electricity_consumptions
})


In [None]:
# Historical data
historical_df = pd.DataFrame({
    'startTime': wind_produced_yearly['startTime'],
    'wind_capacity': wind_capacity_yearly['value'],
    'wind_production': wind_produced_yearly['value'],
    'nuclear_production': nuclear_production_yearly['value'],
    'hydro_production': hydro_production_yearly['value'],
    'industrial_cogeneration_production': industrial_cogeneration_yearly['value'],
    'district_heating_production': district_heating_yearly['value'],
    'electricity_production': electricity_produced_yearly['value'],
    'electricity_consumed': electricity_consumed_yearly['value'],
    'co2': co2_yearly['value']
})

# Combine historical data with future predictions
combined_df = pd.concat([historical_df, future_df], ignore_index=True)


In [None]:
# Calculate future CO2 values


future_co2 = co2_predictions.flatten()
# Add future CO2 values to the dataframe
future_df['co2'] = future_co2

# Update the combined dataframe with the calculated CO2 values
combined_df['co2'] = pd.concat([co2_yearly['value'], pd.Series(future_co2)], ignore_index=True)


future_industrial_cogeneration = industrial_cogeneration_predictions.flatten()
# Add future CO2 values to the dataframe
future_df['industrial_cogeneration_production'] = future_industrial_cogeneration

# Update the combined dataframe with the calculated CO2 values
combined_df['industrial_cogeneration_production'] = pd.concat([industrial_cogeneration_yearly['value'], pd.Series(future_industrial_cogeneration)], ignore_index=True)



future_district_heating = district_heating_predictions.flatten()

# Add future CO2 values to the dataframe
future_df['district_heating_production'] = future_district_heating

# Update the combined dataframe with the calculated CO2 values
combined_df['district_heating_production'] = pd.concat([district_heating_yearly['value'], pd.Series(future_district_heating)], ignore_index=True)

future_df['electricity_production'] = future_df['wind_production'] + future_df['nuclear_production'] + future_df['hydro_production'] +future_df['industrial_cogeneration_production'] + future_df['district_heating_production']

combined_df['electricity_production'] = pd.concat([historical_df['electricity_production'],future_df['electricity_production']], ignore_index=True)



In [None]:
# Assuming the first row in future_df corresponds to the first future prediction date
# Ensure both dates are timezone-naive
last_historical_date = historical_df['startTime'].iloc[-1].tz_localize(None)
first_future_date = future_df['startTime'].iloc[0].tz_localize(None)

# Calculate the midpoint between the last date of 2023 and the first date of 2024
midpoint = last_historical_date + (first_future_date - last_historical_date) / 2


# Create a subplot with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add bar traces for each of the energy metrics on the primary y-axis
fig.add_trace(go.Scatter(x=combined_df['startTime'], y=combined_df['wind_capacity'], 
                         mode='lines+markers', name='Wind Capacity (MWh/h)', 
                         line=dict(color='lightblue'), marker=dict(color='lightblue')))

fig.add_trace(go.Scatter(x=combined_df['startTime'], y=combined_df['electricity_production'], 
                         mode='lines+markers', name='Electricity Produced (MWh/h)', 
                         line=dict(color='darkgreen'), marker=dict(color='lightblue')))

fig.add_trace(go.Scatter(x=combined_df['startTime'], y=combined_df['electricity_consumed'], 
                         mode='lines+markers', name='Electricity Consumed (MWh/h)', 
                         line=dict(color='orange'), marker=dict(color='orange')))


fig.add_trace(go.Bar(x=combined_df['startTime'], y=combined_df['nuclear_production'], name='Nuclear Production (MWh/h)', marker_color='yellow', opacity=0.5,textposition='auto',texttemplate= '%{y}'))
fig.add_trace(go.Bar(x=combined_df['startTime'], y=combined_df['industrial_cogeneration_production'], name='Cogeneration Industry (MWh/h)', marker_color='darkred', opacity=0.5,textposition='auto',texttemplate= '%{y}'))
fig.add_trace(go.Bar(x=combined_df['startTime'], y=combined_df['district_heating_production'], name='Cogeneration district heating (MWh/h)', marker_color='red', opacity=0.5,textposition='auto',texttemplate= '%{y}'))
fig.add_trace(go.Bar(x=combined_df['startTime'], y=combined_df['hydro_production'], name='Hydro Production (MWh/h)', marker_color='blue', opacity=0.5,textposition='auto',texttemplate= '%{y}'))
fig.add_trace(go.Bar(x=combined_df['startTime'], y=combined_df['wind_production'], name='Wind Production (MWh/h)', marker_color='green', opacity=0.5,textposition='auto',texttemplate= '%{y}'))


# Add a vertical line to indicate the start of future predictions using add_shape
# Add a vertical line to indicate the start of future predictions using add_shape
fig.add_shape(
    type="line",
    x0=midpoint,
    y0=0,  # Starting at the bottom of the y-axis
    x1=midpoint,
    y1=1,  # Ending at the top of the y-axis
    line=dict(color="red", width=3, dash="dash"),
    xref="x",  # Reference to the x-axis
    yref="paper"  # Reference to the whole paper (0 to 1) in y-axis
)

# Add annotation for clarity
fig.add_annotation(
    x=midpoint,
    y=1,  # Position at the top
    text="Future Predictions",
    showarrow=False,
    xref="x",
    yref="paper",
    yshift=10  # Slightly shift it upwards for better visibility
)

# Update layout
fig.update_layout(
    title='Energy Production, Consumption, and CO2 Emissions Over Time',
    xaxis_title='Time',
    yaxis_title='Avg Electricity (MWh/h)',
    legend_title='Legend',
    barmode='stack',  # This makes the bars stack
    bargap=0.15,
    bargroupgap=0.1,
    yaxis2=dict(title='CO2 Emission factor (gCO2/kWh)', overlaying='y', side='right'),
    xaxis=dict(
        tickmode='array',
        tickvals=combined_df['startTime'],
        ticktext=[str(date.year) for date in combined_df['startTime']]
    )
)

# Show the plot
fig.show()

In [None]:
fig.write_html('fig.html')