# LIBRERÍAS

In [1]:
import pandas as pd
import numpy as np
import streamlit as st
import seaborn as sns
import plotly.express as px

from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

from prophet.plot import plot_cross_validation_metric

import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import itertools

import plotly.graph_objects as go
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots

# CONTINENTES

In [2]:
CONTINENTS = {
    'Africa': ['Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi', 'Cabo Verde',
               'Cameroon', 'Central African Republic', 'Chad', 'Comoros', 'Congo', 'Cote d\'Ivoire',
               'Democratic Republic of the Congo', 'Djibouti', 'Egypt', 'Equatorial Guinea', 'Eritrea',
               'Eswatini', 'Ethiopia', 'Gabon', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Kenya',
               'Lesotho', 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania',
               'Mauritius', 'Morocco', 'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Rwanda',
               'Sao Tome and Principe', 'Senegal', 'Seychelles', 'Sierra Leone', 'Somalia',
               'South Africa', 'South Sudan', 'Sudan', 'Tanzania', 'Togo', 'Tunisia',
               'Uganda', 'Zambia', 'Zimbabwe'],
    'Asia': ['Afghanistan', 'Armenia', 'Azerbaijan', 'Bahrain', 'Bangladesh', 'Bhutan',
             'Brunei Darussalam', 'Cambodia', 'China', 'Cyprus', 'Georgia', 'India',
             'Indonesia', 'Iran', 'Iraq', 'Israel', 'Japan', 'Jordan', 'Kazakhstan', 'Kuwait',
             'Kyrgyzstan', 'Laos', 'Lebanon', 'Malaysia','Maldives', 'Mongolia', 'Myanmar', 'Nepal',
             'North Korea', 'Oman', 'Pakistan', 'Palestine', 'Philippines', 'Qatar', 'Saudi Arabia',
             'Singapore', 'South Korea', 'Sri Lanka', 'Syria', 'Taiwan', 'Tajikistan',
             'Thailand', 'Timor-Leste', 'Turkey', 'Turkmenistan', 'United Arab Emirates',
             'Uzbekistan', 'Vietnam', 'Yemen'],
    'Europe': ['Albania', 'Andorra', 'Austria', 'Belarus', 'Belgium', 'Bosnia and Herzegovina',
               'Bulgaria', 'Croatia', 'Czech Republic', 'Denmark', 'Estonia', 'Finland',
               'France', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy',
               'Kosovo', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta',
               'Moldova', 'Monaco', 'Montenegro', 'Netherlands', 'North Macedonia', 'Norway',
               'Poland', 'Portugal', 'Romania', 'Russia', 'San Marino', 'Serbia', 'Slovakia',
               'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Ukraine', 'United Kingdom', 'Vatican City'],
    'North America': ['Canada', 'Greenland', 'Mexico', 'United States'],
    'Oceania': ['Australia', 'Fiji', 'New Zealand', 'Papua New Guinea'],
    'South America': ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador',
                  'Guyana', 'Paraguay', 'Peru', 'Suriname', 'Uruguay', 'Venezuela'],
}

# DATASET

In [3]:
# Cargar Dataset
dataset = pd.read_csv("World Energy Consumption.csv")

In [4]:
# Columnas de Generación y Consumo
col_electricity = [
            'nuclear_electricity',
            'oil_electricity',
            'coal_electricity',
            'gas_electricity',
            'hydro_electricity',
            'solar_electricity',
            'wind_electricity',
            'biofuel_electricity']

col_cons =['nuclear_consumption',
           'oil_consumption',
           'coal_consumption',
           'gas_consumption',
           'hydro_consumption',
           'solar_consumption',
           'wind_consumption',
           'biofuel_consumption',]

In [5]:
# Agrupamos (por "year" y "country") y sumamos las columnas adecuadas para Generación y Consumo
dataset_exog_1= dataset.groupby(['year','country'])[col_electricity].sum().sum(axis=1).to_frame('electricity_generation').reset_index("country")
dataset_exog_2 = dataset.groupby(["year", "country"])[col_cons].sum().sum(axis=1).to_frame('electricity_consumption').reset_index('country')

# Agrupamos Generación y Consumo
dataset_exog_3= pd.merge(dataset_exog_1, dataset_exog_2, on=['year', 'country'])

dataset_exog_3['continent'] = dataset_exog_3['country'].map(lambda x: next((k for k, v in CONTINENTS.items() if x in v), None))
dataset_exog = dataset_exog_3[dataset_exog_3['continent'].notna()]

# Filtramos desde 1986
data_1986 = dataset_exog[dataset_exog.index>1985].reset_index()

In [6]:
data_1986

Unnamed: 0,year,country,electricity_generation,electricity_consumption,continent
0,1986,Afghanistan,0.000,0.000,Asia
1,1986,Albania,0.000,0.000,Europe
2,1986,Algeria,0.250,277.490,Africa
3,1986,Angola,0.000,0.000,Africa
4,1986,Argentina,48.898,508.488,South America
...,...,...,...,...,...
5652,2022,Uzbekistan,5.498,586.240,Asia
5653,2022,Venezuela,66.646,613.667,South America
5654,2022,Vietnam,263.330,1273.608,Asia
5655,2022,Yemen,0.000,0.000,Asia


In [9]:
# Creamos el Dataset Mundial
mundial_data = data_1986.groupby('year')[['electricity_generation', 'electricity_consumption']].sum()
mundial_data['continent'] = 'Mundial'
mundial_data = mundial_data.reset_index()

# MODELO PROPHET

In [12]:
def data_lugar (dato,lugar):
    if lugar in dato['continent'].values:
        dato = dato.groupby(['year','continent'])[['electricity_generation', 'electricity_consumption']].sum().reset_index()
        dato = dato[dato['continent']==lugar]
        #print("estoy aqui")
        
    elif lugar in dato['country'].values:
        dato = dato.groupby(['year','country'])[['electricity_generation', 'electricity_consumption']].sum().reset_index()
        dato = dato[dato['country']==lugar]
        
    else:
        return None
    return dato

In [13]:

def prophet(dato,lugar):
    dato= data_lugar(dato,lugar)
    # Crear una copia temporal de los datos
    prophet_data = dato.copy()
    prophet_data['ds'] = pd.to_datetime(prophet_data['year'], format='%Y')
    prophet_data['y'] = prophet_data['electricity_generation']
    
    
    # Definir la grilla de parámetros para optimización
    param_grid = {
        'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5, 1],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
        'seasonality_mode': ['additive', 'multiplicative']
    }

    # Generar todas las combinaciones de parámetros
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    rmses = []

    # Usar el 70% de los datos para entrenamiento
    train_size = int(len(prophet_data) * 0.7)
    train_data = prophet_data.iloc[:train_size]
    test_data = prophet_data.iloc[train_size:]

    print("Finding best parameters...")
    best_rmse = float('inf')
    best_params = None

    for params in all_params:
        # Crear y ajustar el modelo con cada combinación de parámetros
        model = Prophet(
            yearly_seasonality=True,
            changepoint_prior_scale=params['changepoint_prior_scale'],
            seasonality_prior_scale=params['seasonality_prior_scale'],
            seasonality_mode=params['seasonality_mode']
        )
        model.add_regressor('electricity_consumption')
        model.fit(train_data)

        # Generar el dataframe futuro para el periodo de test
        future = model.make_future_dataframe(periods=len(test_data), freq='Y')
        # Incorporar la variable externa en el futuro
        future = future.merge(prophet_data[['ds', 'electricity_consumption']], on='ds', how='left')
        # Rellenar valores faltantes (por ejemplo, usando el último valor observado)
        future['electricity_consumption'] = future['electricity_consumption'].fillna(method='ffill')

        forecast = model.predict(future)

        # Calcular el error para el periodo de test
        test_predictions = forecast.iloc[-len(test_data):]
        rmse = np.sqrt(mean_squared_error(test_data['y'], test_predictions['yhat']))
        rmses.append(rmse)

        if rmse < best_rmse:
            best_rmse = rmse
            best_params = params

    print("\nBest parameters:")
    for param, value in best_params.items():
        print(f"{param}: {value}")
    print(f"Best RMSE: {best_rmse:.2f}")

    # Ajustar el modelo final con los mejores parámetros
    final_model = Prophet(
        yearly_seasonality=True,
        changepoint_prior_scale=best_params['changepoint_prior_scale'],
        seasonality_prior_scale=best_params['seasonality_prior_scale'],
        seasonality_mode=best_params['seasonality_mode']
    )
    final_model.add_regressor('electricity_consumption')
    final_model.fit(prophet_data)

    # Crear el dataframe futuro para 5 periodos adicionales
    future = final_model.make_future_dataframe(periods=5, freq='Y')
    # Incorporar el regressor en el dataframe futuro
    future = future.merge(prophet_data[['ds', 'electricity_consumption']], on='ds', how='left')
    future['electricity_consumption'] = future['electricity_consumption'].fillna(method='ffill')

    forecast = final_model.predict(future)

    fig = go.Figure()

    ultima_fecha = prophet_data['ds'].max()

    # Filtra el dataframe de forecast para obtener solo las fechas posteriores a la última de los datos reales
    forecast_pred = forecast[forecast['ds'] >= ultima_fecha]

    # Traza los datos reales (puedes asignar el color que prefieras)
    fig.add_trace(go.Scatter(
        x=prophet_data['ds'],
        y=prophet_data['y'],
        mode='lines',
        name="Datos reales",
        line=dict(color='blue')  # Color para los datos reales
    ))

    # Traza la predicción solo para el tramo futuro y en color rojo
    fig.add_trace(go.Scatter(
        x=forecast_pred['ds'],
        y=forecast_pred['yhat'],
        mode='lines',
        name="Predicción",
        line=dict(color='red')  # La predicción se mostrará en rojo
    ))



# Etiquetas y diseño
    fig.update_layout(width=900,height=700,
    title=f"Predicción de Generación de Electricidad con Prophet {lugar}",
    xaxis_title="Año",
    yaxis_title="Generación de electricidad",
    template="plotly_white"
    )


    fig.show()

# Print additional metrics for test set
    mae = mean_absolute_error(test_data['y'], test_predictions['yhat'])
    mape = np.mean(np.abs((test_data['y'] - test_predictions['yhat']) / test_data['y'])) * 100

    f_cv = cross_validation(final_model, initial='730 days', period='365 days', horizon='365 days')
    #print(f_cv)

    print("\nTest Set Metrics:")
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {best_rmse:.2f}")
    print(f"MAPE: {mape:.2f}%")

    df_p = performance_metrics(f_cv)
    print(df_p.head(10))
    fig = plot_cross_validation_metric(f_cv, metric='mape')

