In [1]:
import pandas as pd
#Load file
df = pd.read_excel('/content/Base_FINAL.xlsx')

In [2]:
# Filters
df = df[(df['AÑO'] >= 2018) & (~df['TIPO'].isin(['EST', 'OTRAS']))]

In [3]:
import time
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
from sklearn.utils import resample
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Ensure the data is in numpy array format and correct dtype
X_features = df.drop(columns=['TIPO', 'AÑO', 'MORENA', 'PRIANPRD', 'OTROS'])
y = df[['MORENA', 'PRIANPRD', 'OTROS']]

# Convert categorical 'EDO' column to numeric using one-hot encoding without dropping first level
X_features = pd.get_dummies(X_features, columns=['EDO']).values.astype(np.float32)
y = y.values.astype(np.float32)

# Verify the data types
print(f"X_features dtype: {X_features.dtype}")
print(f"y dtype: {y.dtype}")

# Define function to calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred, threshold=1e-2):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true > threshold
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

# Define and train the Sequential Neural Network model
def build_sequential_nn(input_shape):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=input_shape))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(3, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Define and train the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(1, input_shape)))
    model.add(Dense(3, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Train and evaluate models
input_shape = (X_features.shape[1],)
X_features_reshaped = X_features.reshape((X_features.shape[0], 1, X_features.shape[1]))

models = {
    'Sequential_NN': build_sequential_nn(input_shape),
    'LSTM': build_lstm_model(X_features.shape[1])
}

trained_models = {}
mse_scores = {}
mae_scores = {}
mape_scores = {}

# Training models
for name, model in models.items():
    if name == 'LSTM':
        model.fit(X_features_reshaped, y, epochs=50, batch_size=32, verbose=0)
    else:
        model.fit(X_features, y, epochs=50, batch_size=32, verbose=0)

    preds = model.predict(X_features if name != 'LSTM' else X_features_reshaped)
    mse_scores[name] = mean_squared_error(y, preds)
    mae_scores[name] = mean_absolute_error(y, preds)
    mape_scores[name] = mean_absolute_percentage_error(y, preds)
    trained_models[name] = model

# Select the models for predictions
selected_models = ['Sequential_NN', 'LSTM']

# Create a DataFrame to display the errors of the selected models
errors_df = pd.DataFrame(
    [(model, mse_scores[model], mae_scores[model], mape_scores[model]) for model in selected_models],
    columns=['Model', 'MSE', 'MAE', 'MAPE']
)

# Display the DataFrame with the selected models and their errors
print("Selected Models with Errors:")
print(errors_df)

# Timing the process
start_time = time.time()

# Initialize dictionary to store predictions per EDO
predictions_per_edo = {}
ci_per_edo = {}

# Get unique labels in the 'EDO' column
unique_edos = df['EDO'].unique()

# Ensure the one-hot encoded columns exist
encoded_columns = [f'EDO_{edo}' for edo in unique_edos]
encoded_columns_in_df = [col for col in encoded_columns if col in df.columns]

# Number of bootstrap samples
n_bootstrap_samples = 100  # Reduced to 100 for faster computation

# General prediction
general_predictions = {}
general_ci = {}

# Calculate general predictions and confidence intervals
for name in selected_models:
    model = trained_models[name]
    preds_samples = []

    for _ in tqdm(range(n_bootstrap_samples), desc=f"Bootstrap sampling for general {name}"):
        X_resampled = resample(X_features).astype(np.float32)
        if name == 'LSTM':
            X_resampled = X_resampled.reshape((X_resampled.shape[0], 1, X_resampled.shape[1]))
        preds_resampled = model.predict(X_resampled)
        preds_samples.append(preds_resampled.mean(axis=0))

    preds_samples = np.array(preds_samples)
    preds_mean = preds_samples.mean(axis=0)
    preds_lower = np.percentile(preds_samples, 2.5, axis=0)
    preds_upper = np.percentile(preds_samples, 97.5, axis=0)

    general_predictions[name] = preds_mean
    general_ci[name] = (preds_lower, preds_upper)

# Iterate over each unique 'EDO' label
for edo in tqdm(unique_edos, desc="Processing EDOs"):
    # Create a dummy data point for prediction for the given EDO
    X_new_period_edo = X_features.mean(axis=0).reshape((1, -1)).astype(np.float32)

    # Check if the one-hot encoded column for this EDO exists
    col_name = f'EDO_{edo}'
    if col_name in encoded_columns_in_df:
        col_index = df.columns.get_loc(col_name)
        X_new_period_edo[0, col_index] = 1  # Add one-hot encoded EDO column

    # Ensure the new period data has the same columns as the training set
    X_new_period_edo_reshaped = X_new_period_edo.reshape((X_new_period_edo.shape[0], 1, X_new_period_edo.shape[1]))

    # Store predictions and confidence intervals for the selected models
    for name in selected_models:
        model = trained_models[name]

        # Bootstrap sampling to calculate confidence intervals
        preds_samples = []
        for _ in range(n_bootstrap_samples):
            preds_resampled = model.predict(X_new_period_edo if name != 'LSTM' else X_new_period_edo_reshaped)
            preds_samples.append(preds_resampled.flatten())

        preds_samples = np.array(preds_samples)
        preds_mean = preds_samples.mean(axis=0)
        preds_lower = np.percentile(preds_samples, 2.5, axis=0)
        preds_upper = np.percentile(preds_samples, 97.5, axis=0)

        predictions_per_edo.setdefault(name, []).append(preds_mean)
        ci_per_edo.setdefault(name, []).append((preds_lower, preds_upper))

# Timing end
end_time = time.time()
total_time = end_time - start_time

# Convert general predictions to DataFrames and add labels
general_predictions_df = {name: pd.DataFrame([preds], columns=['MORENA', 'PRIANPRD', 'OTROS']) for name, preds in general_predictions.items()}

# Convert EDO predictions to DataFrames and add labels
predictions_df_per_edo = {name: pd.DataFrame(preds, index=unique_edos, columns=['MORENA', 'PRIANPRD', 'OTROS']) for name, preds in predictions_per_edo.items()}

# Display the general predictions and confidence intervals
for name, df in general_predictions_df.items():
    print(f"\n{name} general predictions for the next period:")
    print(df)
    lower_ci = general_ci[name][0]
    upper_ci = general_ci[name][1]
    print(f"95% Confidence Intervals for {name}:")
    print(f"Lower - MORENA: {lower_ci[0]:.4f}, PRIANPRD: {lower_ci[1]:.4f}, OTROS: {lower_ci[2]:.4f}")
    print(f"Upper - MORENA: {upper_ci[0]:.4f}, PRIANPRD: {upper_ci[1]:.4f}, OTROS: {upper_ci[2]:.4f}")

# Display the DataFrames with EDO predictions and confidence intervals
for name, df in predictions_df_per_edo.items():
    print(f"\n{name} predictions per EDO for the next period:")
    print(df)
    print(f"95% Confidence Intervals for {name}:")
    for i, edo in enumerate(unique_edos):
        lower_ci = ci_per_edo[name][i][0]
        upper_ci = ci_per_edo[name][i][1]
        print(f"{edo}: Lower - MORENA: {lower_ci[0]:.4f}, PRIANPRD: {lower_ci[1]:.4f}, OTROS: {lower_ci[2]:.4f}")
        print(f"       Upper - MORENA: {upper_ci[0]:.4f}, PRIANPRD: {upper_ci[1]:.4f}, OTROS: {upper_ci[2]:.4f}")

print(f"Total execution time: {total_time:.2f} seconds")




X_features dtype: float32
y dtype: float32
Selected Models with Errors:
           Model       MSE       MAE       MAPE
0  Sequential_NN  0.004148  0.049625  24.513263
1           LSTM  0.004266  0.050149  24.223070


Bootstrap sampling for general Sequential_NN:   0%|          | 0/100 [00:00<?, ?it/s]



Bootstrap sampling for general Sequential_NN:   2%|▏         | 2/100 [00:00<00:07, 13.34it/s]



Bootstrap sampling for general Sequential_NN:   4%|▍         | 4/100 [00:00<00:07, 13.56it/s]



Bootstrap sampling for general Sequential_NN:   6%|▌         | 6/100 [00:00<00:06, 13.79it/s]



Bootstrap sampling for general Sequential_NN:   8%|▊         | 8/100 [00:00<00:06, 13.85it/s]



Bootstrap sampling for general Sequential_NN:  10%|█         | 10/100 [00:00<00:06, 13.10it/s]



Bootstrap sampling for general Sequential_NN:  12%|█▏        | 12/100 [00:00<00:06, 13.39it/s]



Bootstrap sampling for general Sequential_NN:  14%|█▍        | 14/100 [00:01<00:06, 13.60it/s]



Bootstrap sampling for general Sequential_NN:  16%|█▌        | 16/100 [00:01<00:06, 13.58it/s]



Bootstrap sampling for general Sequential_NN:  18%|█▊        | 18/100 [00:01<00:06, 13.48it/s]



Bootstrap sampling for general Sequential_NN:  20%|██        | 20/100 [00:01<00:05, 13.64it/s]



Bootstrap sampling for general Sequential_NN:  22%|██▏       | 22/100 [00:01<00:05, 13.52it/s]



Bootstrap sampling for general Sequential_NN:  24%|██▍       | 24/100 [00:01<00:05, 13.33it/s]



Bootstrap sampling for general Sequential_NN:  26%|██▌       | 26/100 [00:01<00:05, 13.43it/s]



Bootstrap sampling for general Sequential_NN:  28%|██▊       | 28/100 [00:02<00:05, 13.11it/s]



Bootstrap sampling for general Sequential_NN:  30%|███       | 30/100 [00:02<00:05, 13.15it/s]



Bootstrap sampling for general Sequential_NN:  32%|███▏      | 32/100 [00:02<00:05, 13.34it/s]



Bootstrap sampling for general Sequential_NN:  34%|███▍      | 34/100 [00:02<00:04, 13.42it/s]



Bootstrap sampling for general Sequential_NN:  36%|███▌      | 36/100 [00:02<00:04, 13.28it/s]



Bootstrap sampling for general Sequential_NN:  38%|███▊      | 38/100 [00:02<00:04, 13.23it/s]



Bootstrap sampling for general Sequential_NN:  40%|████      | 40/100 [00:02<00:04, 13.33it/s]



Bootstrap sampling for general Sequential_NN:  42%|████▏     | 42/100 [00:03<00:04, 13.17it/s]



Bootstrap sampling for general Sequential_NN:  44%|████▍     | 44/100 [00:03<00:04, 12.72it/s]



Bootstrap sampling for general Sequential_NN:  46%|████▌     | 46/100 [00:03<00:04, 12.73it/s]



Bootstrap sampling for general Sequential_NN:  48%|████▊     | 48/100 [00:03<00:04, 12.82it/s]



Bootstrap sampling for general Sequential_NN:  50%|█████     | 50/100 [00:03<00:03, 13.06it/s]



Bootstrap sampling for general Sequential_NN:  52%|█████▏    | 52/100 [00:03<00:03, 13.26it/s]



Bootstrap sampling for general Sequential_NN:  54%|█████▍    | 54/100 [00:04<00:03, 13.11it/s]



Bootstrap sampling for general Sequential_NN:  56%|█████▌    | 56/100 [00:04<00:03, 13.26it/s]



Bootstrap sampling for general Sequential_NN:  58%|█████▊    | 58/100 [00:04<00:03, 13.40it/s]



Bootstrap sampling for general Sequential_NN:  60%|██████    | 60/100 [00:04<00:02, 13.39it/s]



Bootstrap sampling for general Sequential_NN:  62%|██████▏   | 62/100 [00:04<00:02, 13.48it/s]



Bootstrap sampling for general Sequential_NN:  64%|██████▍   | 64/100 [00:04<00:02, 13.47it/s]



Bootstrap sampling for general Sequential_NN:  66%|██████▌   | 66/100 [00:04<00:02, 13.51it/s]



Bootstrap sampling for general Sequential_NN:  68%|██████▊   | 68/100 [00:05<00:02, 13.41it/s]



Bootstrap sampling for general Sequential_NN:  70%|███████   | 70/100 [00:05<00:02, 13.44it/s]



Bootstrap sampling for general Sequential_NN:  72%|███████▏  | 72/100 [00:05<00:02, 13.34it/s]



Bootstrap sampling for general Sequential_NN:  74%|███████▍  | 74/100 [00:05<00:01, 13.28it/s]



Bootstrap sampling for general Sequential_NN:  76%|███████▌  | 76/100 [00:05<00:01, 13.35it/s]



Bootstrap sampling for general Sequential_NN:  78%|███████▊  | 78/100 [00:05<00:01, 13.14it/s]



Bootstrap sampling for general Sequential_NN:  80%|████████  | 80/100 [00:06<00:01, 13.14it/s]



Bootstrap sampling for general Sequential_NN:  82%|████████▏ | 82/100 [00:06<00:01, 13.04it/s]



Bootstrap sampling for general Sequential_NN:  84%|████████▍ | 84/100 [00:06<00:01, 13.32it/s]



Bootstrap sampling for general Sequential_NN:  86%|████████▌ | 86/100 [00:06<00:01, 13.24it/s]



Bootstrap sampling for general Sequential_NN:  88%|████████▊ | 88/100 [00:06<00:00, 13.27it/s]



Bootstrap sampling for general Sequential_NN:  90%|█████████ | 90/100 [00:06<00:00, 13.24it/s]



Bootstrap sampling for general Sequential_NN:  92%|█████████▏| 92/100 [00:06<00:00, 13.25it/s]



Bootstrap sampling for general Sequential_NN:  94%|█████████▍| 94/100 [00:07<00:00, 13.22it/s]



Bootstrap sampling for general Sequential_NN:  96%|█████████▌| 96/100 [00:07<00:00, 13.39it/s]



Bootstrap sampling for general Sequential_NN:  98%|█████████▊| 98/100 [00:07<00:00, 13.57it/s]



Bootstrap sampling for general Sequential_NN: 100%|██████████| 100/100 [00:07<00:00, 13.30it/s]
Bootstrap sampling for general LSTM:   0%|          | 0/100 [00:00<?, ?it/s]



Bootstrap sampling for general LSTM:   2%|▏         | 2/100 [00:00<00:07, 13.72it/s]



Bootstrap sampling for general LSTM:   4%|▍         | 4/100 [00:00<00:06, 13.83it/s]



Bootstrap sampling for general LSTM:   6%|▌         | 6/100 [00:00<00:07, 13.21it/s]



Bootstrap sampling for general LSTM:   8%|▊         | 8/100 [00:00<00:07, 13.05it/s]



Bootstrap sampling for general LSTM:  10%|█         | 10/100 [00:00<00:06, 13.21it/s]



Bootstrap sampling for general LSTM:  12%|█▏        | 12/100 [00:00<00:06, 13.11it/s]



Bootstrap sampling for general LSTM:  14%|█▍        | 14/100 [00:01<00:06, 13.16it/s]



Bootstrap sampling for general LSTM:  16%|█▌        | 16/100 [00:01<00:06, 13.28it/s]



Bootstrap sampling for general LSTM:  18%|█▊        | 18/100 [00:01<00:06, 12.79it/s]



Bootstrap sampling for general LSTM:  20%|██        | 20/100 [00:01<00:06, 12.82it/s]



Bootstrap sampling for general LSTM:  22%|██▏       | 22/100 [00:01<00:05, 13.00it/s]



Bootstrap sampling for general LSTM:  24%|██▍       | 24/100 [00:01<00:05, 13.04it/s]



Bootstrap sampling for general LSTM:  26%|██▌       | 26/100 [00:01<00:05, 12.93it/s]



Bootstrap sampling for general LSTM:  28%|██▊       | 28/100 [00:02<00:05, 13.14it/s]



Bootstrap sampling for general LSTM:  30%|███       | 30/100 [00:02<00:05, 13.28it/s]



Bootstrap sampling for general LSTM:  32%|███▏      | 32/100 [00:02<00:05, 12.90it/s]



Bootstrap sampling for general LSTM:  34%|███▍      | 34/100 [00:02<00:05, 12.99it/s]



Bootstrap sampling for general LSTM:  36%|███▌      | 36/100 [00:02<00:04, 12.94it/s]



Bootstrap sampling for general LSTM:  38%|███▊      | 38/100 [00:02<00:04, 12.89it/s]



Bootstrap sampling for general LSTM:  40%|████      | 40/100 [00:03<00:04, 13.08it/s]



Bootstrap sampling for general LSTM:  42%|████▏     | 42/100 [00:03<00:04, 13.34it/s]



Bootstrap sampling for general LSTM:  44%|████▍     | 44/100 [00:03<00:04, 13.17it/s]



Bootstrap sampling for general LSTM:  46%|████▌     | 46/100 [00:03<00:04, 13.14it/s]



Bootstrap sampling for general LSTM:  48%|████▊     | 48/100 [00:03<00:03, 13.34it/s]



Bootstrap sampling for general LSTM:  50%|█████     | 50/100 [00:03<00:03, 13.32it/s]



Bootstrap sampling for general LSTM:  52%|█████▏    | 52/100 [00:03<00:03, 13.32it/s]



Bootstrap sampling for general LSTM:  54%|█████▍    | 54/100 [00:04<00:03, 13.29it/s]



Bootstrap sampling for general LSTM:  56%|█████▌    | 56/100 [00:04<00:03, 13.33it/s]



Bootstrap sampling for general LSTM:  58%|█████▊    | 58/100 [00:04<00:03, 13.28it/s]



Bootstrap sampling for general LSTM:  60%|██████    | 60/100 [00:04<00:03, 13.15it/s]



Bootstrap sampling for general LSTM:  62%|██████▏   | 62/100 [00:04<00:02, 13.20it/s]



Bootstrap sampling for general LSTM:  64%|██████▍   | 64/100 [00:04<00:02, 13.15it/s]



Bootstrap sampling for general LSTM:  66%|██████▌   | 66/100 [00:05<00:02, 13.26it/s]



Bootstrap sampling for general LSTM:  68%|██████▊   | 68/100 [00:05<00:02, 13.48it/s]



Bootstrap sampling for general LSTM:  70%|███████   | 70/100 [00:05<00:02, 13.45it/s]



Bootstrap sampling for general LSTM:  72%|███████▏  | 72/100 [00:05<00:02, 13.26it/s]



Bootstrap sampling for general LSTM:  74%|███████▍  | 74/100 [00:05<00:01, 13.36it/s]



Bootstrap sampling for general LSTM:  76%|███████▌  | 76/100 [00:05<00:01, 13.41it/s]



Bootstrap sampling for general LSTM:  78%|███████▊  | 78/100 [00:05<00:01, 13.32it/s]



Bootstrap sampling for general LSTM:  80%|████████  | 80/100 [00:06<00:01, 13.32it/s]



Bootstrap sampling for general LSTM:  82%|████████▏ | 82/100 [00:06<00:01, 13.02it/s]



Bootstrap sampling for general LSTM:  84%|████████▍ | 84/100 [00:06<00:01, 12.88it/s]



Bootstrap sampling for general LSTM:  86%|████████▌ | 86/100 [00:06<00:01, 13.11it/s]



Bootstrap sampling for general LSTM:  88%|████████▊ | 88/100 [00:06<00:00, 13.25it/s]



Bootstrap sampling for general LSTM:  90%|█████████ | 90/100 [00:06<00:00, 12.67it/s]



Bootstrap sampling for general LSTM:  92%|█████████▏| 92/100 [00:07<00:00, 12.77it/s]



Bootstrap sampling for general LSTM:  94%|█████████▍| 94/100 [00:07<00:00, 12.87it/s]



Bootstrap sampling for general LSTM:  96%|█████████▌| 96/100 [00:07<00:00, 12.72it/s]



Bootstrap sampling for general LSTM:  98%|█████████▊| 98/100 [00:07<00:00, 12.71it/s]



Bootstrap sampling for general LSTM: 100%|██████████| 100/100 [00:07<00:00, 13.08it/s]
Processing EDOs:   0%|          | 0/33 [00:00<?, ?it/s]



Processing EDOs:   3%|▎         | 1/33 [00:12<06:40, 12.50s/it]



Processing EDOs:   6%|▌         | 2/33 [00:24<06:23, 12.37s/it]



Processing EDOs:   9%|▉         | 3/33 [00:36<06:08, 12.29s/it]



Processing EDOs:  12%|█▏        | 4/33 [00:49<05:55, 12.27s/it]



Processing EDOs:  15%|█▌        | 5/33 [01:01<05:43, 12.26s/it]



Processing EDOs:  18%|█▊        | 6/33 [01:14<05:34, 12.37s/it]



Processing EDOs:  21%|██        | 7/33 [01:26<05:22, 12.41s/it]



Processing EDOs:  24%|██▍       | 8/33 [01:38<05:10, 12.41s/it]



Processing EDOs:  27%|██▋       | 9/33 [01:51<04:56, 12.37s/it]



Processing EDOs:  30%|███       | 10/33 [02:03<04:44, 12.35s/it]



Processing EDOs:  33%|███▎      | 11/33 [02:16<04:36, 12.57s/it]



Processing EDOs:  36%|███▋      | 12/33 [02:29<04:23, 12.56s/it]



Processing EDOs:  39%|███▉      | 13/33 [02:41<04:10, 12.53s/it]



Processing EDOs:  42%|████▏     | 14/33 [02:53<03:57, 12.49s/it]



Processing EDOs:  45%|████▌     | 15/33 [03:06<03:44, 12.45s/it]



Processing EDOs:  48%|████▊     | 16/33 [03:19<03:33, 12.57s/it]



Processing EDOs:  52%|█████▏    | 17/33 [03:31<03:21, 12.60s/it]



Processing EDOs:  55%|█████▍    | 18/33 [03:44<03:08, 12.55s/it]



Processing EDOs:  58%|█████▊    | 19/33 [03:56<02:55, 12.53s/it]



Processing EDOs:  61%|██████    | 20/33 [04:09<02:42, 12.48s/it]



Processing EDOs:  64%|██████▎   | 21/33 [04:22<02:31, 12.62s/it]



Processing EDOs:  67%|██████▋   | 22/33 [04:34<02:19, 12.67s/it]



Processing EDOs:  70%|██████▉   | 23/33 [04:47<02:06, 12.62s/it]



Processing EDOs:  73%|███████▎  | 24/33 [04:59<01:53, 12.58s/it]



Processing EDOs:  76%|███████▌  | 25/33 [05:12<01:40, 12.52s/it]



Processing EDOs:  79%|███████▉  | 26/33 [05:25<01:28, 12.64s/it]



Processing EDOs:  82%|████████▏ | 27/33 [05:38<01:16, 12.74s/it]



Processing EDOs:  85%|████████▍ | 28/33 [05:50<01:03, 12.71s/it]



Processing EDOs:  88%|████████▊ | 29/33 [06:03<00:50, 12.67s/it]



Processing EDOs:  91%|█████████ | 30/33 [06:15<00:37, 12.61s/it]



Processing EDOs:  94%|█████████▍| 31/33 [06:28<00:25, 12.72s/it]



Processing EDOs:  97%|█████████▋| 32/33 [06:42<00:12, 12.89s/it]



Processing EDOs: 100%|██████████| 33/33 [06:54<00:00, 12.57s/it]


Sequential_NN general predictions for the next period:
     MORENA  PRIANPRD     OTROS
0  0.523954  0.402746  0.070216
95% Confidence Intervals for Sequential_NN:
Lower - MORENA: 0.5080, PRIANPRD: 0.3908, OTROS: 0.0670
Upper - MORENA: 0.5349, PRIANPRD: 0.4206, OTROS: 0.0732

LSTM general predictions for the next period:
     MORENA  PRIANPRD     OTROS
0  0.526276  0.405451  0.066633
95% Confidence Intervals for LSTM:
Lower - MORENA: 0.5137, PRIANPRD: 0.3903, OTROS: 0.0636
Upper - MORENA: 0.5394, PRIANPRD: 0.4190, OTROS: 0.0697

Sequential_NN predictions per EDO for the next period:
      MORENA  PRIANPRD     OTROS
1   0.367633  0.362346  0.010018
2   0.367633  0.362346  0.010018
3   0.367633  0.362346  0.010018
4   0.367633  0.362346  0.010018
5   0.367633  0.362346  0.010018
6   0.367633  0.362346  0.010018
7   0.367633  0.362346  0.010018
8   0.367633  0.362346  0.010018
9   0.367633  0.362346  0.010018
10  0.367633  0.362346  0.010018
11  0.367633  0.362346  0.010018
12  0.367633  




In [3]:
import time
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
from sklearn.utils import resample
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Ensure the data is in numpy array format and correct dtype
X_features = df.drop(columns=['TIPO', 'AÑO', 'MORENA', 'PRIANPRD', 'OTROS'])
y = df[['MORENA', 'PRIANPRD', 'OTROS']]

# Convert categorical 'EDO' column to numeric using one-hot encoding without dropping the first level
X_features = pd.get_dummies(X_features, columns=['EDO']).values.astype(np.float32)
y = y.values.astype(np.float32)

# Verify the data types
print(f"X_features dtype: {X_features.dtype}")
print(f"y dtype: {y.dtype}")

# Define function to calculate MAPE
def mean_absolute_percentage_error_custom(y_true, y_pred, threshold=1e-2):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true > threshold
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

# Define and train the Sequential Neural Network model
def build_sequential_nn(input_shape):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=input_shape))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(3, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Define and train the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(1, input_shape)))
    model.add(Dense(3, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Train and evaluate models
input_shape = (X_features.shape[1],)
X_features_reshaped = X_features.reshape((X_features.shape[0], 1, X_features.shape[1]))

models = {
    'Sequential_NN': build_sequential_nn(input_shape),
    'LSTM': build_lstm_model(X_features.shape[1])
}

trained_models = {}
mse_scores = {}
mae_scores = {}
mape_scores = {}

# Training models
for name, model in models.items():
    if name == 'LSTM':
        model.fit(X_features_reshaped, y, epochs=50, batch_size=32, verbose=0)
    else:
        model.fit(X_features, y, epochs=50, batch_size=32, verbose=0)

    preds = model.predict(X_features if name != 'LSTM' else X_features_reshaped)
    mse_scores[name] = mean_squared_error(y, preds)
    mae_scores[name] = mean_absolute_error(y, preds)
    mape_scores[name] = mean_absolute_percentage_error_custom(y, preds)
    trained_models[name] = model

# Create a DataFrame to display the errors of the selected models
errors_df = pd.DataFrame(
    [(model, mse_scores[model], mae_scores[model], mape_scores[model]) for model in models.keys()],
    columns=['Model', 'MSE', 'MAE', 'MAPE']
)

# Display the DataFrame with the errors
print("Selected Models with Errors:")
print(errors_df)

# Timing the process
start_time = time.time()

# Initialize dictionary to store predictions per EDO
predictions_per_edo = {}
ci_per_edo = {}

# Get unique labels in the 'EDO' column
unique_edos = df['EDO'].unique()

# Ensure the one-hot encoded columns exist
encoded_columns = [f'EDO_{edo}' for edo in unique_edos]
encoded_columns_in_df = [col for col in encoded_columns if col in df.columns]

# Number of bootstrap samples
n_bootstrap_samples = 100  # Reduced to 100 for faster computation

# General prediction
general_predictions = {}
general_ci = {}

# Calculate general predictions and confidence intervals
for name in models.keys():
    model = trained_models[name]
    preds_samples = []

    for _ in tqdm(range(n_bootstrap_samples), desc=f"Bootstrap sampling for general {name}"):
        X_resampled = resample(X_features).astype(np.float32)
        if name == 'LSTM':
            X_resampled = X_resampled.reshape((X_resampled.shape[0], 1, X_resampled.shape[1]))
        preds_resampled = model.predict(X_resampled)
        preds_samples.append(preds_resampled.mean(axis=0))

    preds_samples = np.array(preds_samples)
    preds_mean = preds_samples.mean(axis=0)
    preds_lower = np.percentile(preds_samples, 2.5, axis=0)
    preds_upper = np.percentile(preds_samples, 97.5, axis=0)

    general_predictions[name] = preds_mean
    general_ci[name] = (preds_lower, preds_upper)

# Iterate over each unique 'EDO' label
for edo in tqdm(unique_edos, desc="Processing EDOs"):
    # Create a dummy data point for prediction for the given EDO
    X_new_period_edo = X_features.mean(axis=0).reshape((1, -1)).astype(np.float32)

    # Check if the one-hot encoded column for this EDO exists
    col_name = f'EDO_{edo}'
    if col_name in encoded_columns_in_df:
        col_index = df.columns.get_loc(col_name)
        X_new_period_edo[0, col_index] = 1  # Add one-hot encoded EDO column

    # Ensure the new period data has the same columns as the training set
    X_new_period_edo_reshaped = X_new_period_edo.reshape((X_new_period_edo.shape[0], 1, X_new_period_edo.shape[1]))

    # Store predictions and confidence intervals for the selected models
    for name in models.keys():
        model = trained_models[name]

        # Bootstrap sampling to calculate confidence intervals
        preds_samples = []
        for _ in range(n_bootstrap_samples):
            preds_resampled = model.predict(X_new_period_edo if name != 'LSTM' else X_new_period_edo_reshaped)
            preds_samples.append(preds_resampled.flatten())

        preds_samples = np.array(preds_samples)
        preds_mean = preds_samples.mean(axis=0)
        preds_lower = np.percentile(preds_samples, 2.5, axis=0)
        preds_upper = np.percentile(preds_samples, 97.5, axis=0)

        predictions_per_edo.setdefault(name, []).append(preds_mean)
        ci_per_edo.setdefault(name, []).append((preds_lower, preds_upper))

# Timing end
end_time = time.time()
total_time = end_time - start_time

# Convert general predictions to DataFrames and add labels
general_predictions_df = {name: pd.DataFrame([preds], columns=['MORENA', 'PRIANPRD', 'OTROS']) for name, preds in general_predictions.items()}

# Convert EDO predictions to DataFrames and add labels
predictions_df_per_edo = {name: pd.DataFrame(preds, index=unique_edos, columns=['MORENA', 'PRIANPRD', 'OTROS']) for name, preds in predictions_per_edo.items()}

# Display the general predictions and confidence intervals
for name, df in general_predictions_df.items():
    print(f"\n{name} general predictions for the next period:")
    print(df)
    lower_ci = general_ci[name][0]
    upper_ci = general_ci[name][1]
    print(f"95% Confidence Intervals for {name}:")
    print(f"Lower - MORENA: {lower_ci[0]:.4f}, PRIANPRD: {lower_ci[1]:.4f}, OTROS: {lower_ci[2]:.4f}")
    print(f"Upper - MORENA: {upper_ci[0]:.4f}, PRIANPRD: {upper_ci[1]:.4f}, OTROS: {upper_ci[2]:.4f}")

# Display the DataFrames with EDO predictions and confidence intervals
for name, df in predictions_df_per_edo.items():
    print(f"\n{name} predictions per EDO for the next period:")
    print(df)
    print(f"95% Confidence Intervals for {name}:")
    for i, edo in enumerate(unique_edos):
        lower_ci = ci_per_edo[name][i][0]
        upper_ci = ci_per_edo[name][i][1]
        print(f"{edo}: Lower - MORENA: {lower_ci[0]:.4f}, PRIANPRD: {lower_ci[1]:.4f}, OTROS: {lower_ci[2]:.4f}")
        print(f"       Upper - MORENA: {upper_ci[0]:.4f}, PRIANPRD: {upper_ci[1]:.4f}, OTROS: {upper_ci[2]:.4f}")

print(f"Total execution time: {total_time:.2f} seconds")


X_features dtype: float32
y dtype: float32




Selected Models with Errors:
           Model       MSE       MAE       MAPE
0  Sequential_NN  0.004155  0.049426  24.274854
1           LSTM  0.004342  0.050548  24.143195


Bootstrap sampling for general Sequential_NN:   0%|          | 0/100 [00:00<?, ?it/s]



Bootstrap sampling for general Sequential_NN:   2%|▏         | 2/100 [00:00<00:07, 13.43it/s]



Bootstrap sampling for general Sequential_NN:   4%|▍         | 4/100 [00:00<00:07, 13.15it/s]



Bootstrap sampling for general Sequential_NN:   6%|▌         | 6/100 [00:00<00:06, 13.44it/s]



Bootstrap sampling for general Sequential_NN:   8%|▊         | 8/100 [00:00<00:06, 13.33it/s]



Bootstrap sampling for general Sequential_NN:  10%|█         | 10/100 [00:00<00:06, 13.38it/s]



Bootstrap sampling for general Sequential_NN:  12%|█▏        | 12/100 [00:00<00:06, 13.31it/s]



Bootstrap sampling for general Sequential_NN:  14%|█▍        | 14/100 [00:01<00:06, 13.54it/s]



Bootstrap sampling for general Sequential_NN:  16%|█▌        | 16/100 [00:01<00:06, 13.07it/s]



Bootstrap sampling for general Sequential_NN:  18%|█▊        | 18/100 [00:01<00:06, 13.21it/s]



Bootstrap sampling for general Sequential_NN:  20%|██        | 20/100 [00:01<00:05, 13.38it/s]



Bootstrap sampling for general Sequential_NN:  22%|██▏       | 22/100 [00:01<00:05, 13.53it/s]



Bootstrap sampling for general Sequential_NN:  24%|██▍       | 24/100 [00:01<00:05, 13.63it/s]



Bootstrap sampling for general Sequential_NN:  26%|██▌       | 26/100 [00:01<00:05, 13.36it/s]



Bootstrap sampling for general Sequential_NN:  28%|██▊       | 28/100 [00:02<00:05, 13.58it/s]



Bootstrap sampling for general Sequential_NN:  30%|███       | 30/100 [00:02<00:05, 13.32it/s]



Bootstrap sampling for general Sequential_NN:  32%|███▏      | 32/100 [00:02<00:05, 13.50it/s]



Bootstrap sampling for general Sequential_NN:  34%|███▍      | 34/100 [00:02<00:04, 13.68it/s]



Bootstrap sampling for general Sequential_NN:  36%|███▌      | 36/100 [00:02<00:04, 13.53it/s]



Bootstrap sampling for general Sequential_NN:  38%|███▊      | 38/100 [00:02<00:04, 13.45it/s]



Bootstrap sampling for general Sequential_NN:  40%|████      | 40/100 [00:02<00:04, 13.42it/s]



Bootstrap sampling for general Sequential_NN:  42%|████▏     | 42/100 [00:03<00:04, 13.44it/s]



Bootstrap sampling for general Sequential_NN:  44%|████▍     | 44/100 [00:03<00:04, 13.35it/s]



Bootstrap sampling for general Sequential_NN:  46%|████▌     | 46/100 [00:03<00:04, 13.33it/s]



Bootstrap sampling for general Sequential_NN:  48%|████▊     | 48/100 [00:03<00:03, 13.11it/s]



Bootstrap sampling for general Sequential_NN:  50%|█████     | 50/100 [00:03<00:03, 13.12it/s]



Bootstrap sampling for general Sequential_NN:  52%|█████▏    | 52/100 [00:03<00:03, 13.25it/s]



Bootstrap sampling for general Sequential_NN:  54%|█████▍    | 54/100 [00:04<00:03, 13.28it/s]



Bootstrap sampling for general Sequential_NN:  56%|█████▌    | 56/100 [00:04<00:03, 13.17it/s]



Bootstrap sampling for general Sequential_NN:  58%|█████▊    | 58/100 [00:04<00:03, 12.67it/s]



Bootstrap sampling for general Sequential_NN:  60%|██████    | 60/100 [00:04<00:03, 12.98it/s]



Bootstrap sampling for general Sequential_NN:  62%|██████▏   | 62/100 [00:04<00:02, 13.02it/s]



Bootstrap sampling for general Sequential_NN:  64%|██████▍   | 64/100 [00:04<00:02, 13.30it/s]



Bootstrap sampling for general Sequential_NN:  66%|██████▌   | 66/100 [00:04<00:02, 13.44it/s]



Bootstrap sampling for general Sequential_NN:  68%|██████▊   | 68/100 [00:05<00:02, 13.42it/s]



Bootstrap sampling for general Sequential_NN:  70%|███████   | 70/100 [00:05<00:02, 13.39it/s]



Bootstrap sampling for general Sequential_NN:  72%|███████▏  | 72/100 [00:05<00:02, 13.33it/s]



Bootstrap sampling for general Sequential_NN:  74%|███████▍  | 74/100 [00:05<00:01, 13.44it/s]



Bootstrap sampling for general Sequential_NN:  76%|███████▌  | 76/100 [00:05<00:01, 13.29it/s]



Bootstrap sampling for general Sequential_NN:  78%|███████▊  | 78/100 [00:05<00:01, 13.60it/s]



Bootstrap sampling for general Sequential_NN:  80%|████████  | 80/100 [00:05<00:01, 13.83it/s]



Bootstrap sampling for general Sequential_NN:  82%|████████▏ | 82/100 [00:06<00:01, 13.74it/s]



Bootstrap sampling for general Sequential_NN:  84%|████████▍ | 84/100 [00:06<00:01, 13.59it/s]



Bootstrap sampling for general Sequential_NN:  86%|████████▌ | 86/100 [00:06<00:01, 13.37it/s]



Bootstrap sampling for general Sequential_NN:  88%|████████▊ | 88/100 [00:06<00:00, 13.42it/s]



Bootstrap sampling for general Sequential_NN:  90%|█████████ | 90/100 [00:06<00:00, 13.58it/s]



Bootstrap sampling for general Sequential_NN:  92%|█████████▏| 92/100 [00:06<00:00, 13.81it/s]



Bootstrap sampling for general Sequential_NN:  94%|█████████▍| 94/100 [00:07<00:00, 13.64it/s]



Bootstrap sampling for general Sequential_NN:  96%|█████████▌| 96/100 [00:07<00:00, 13.62it/s]



Bootstrap sampling for general Sequential_NN:  98%|█████████▊| 98/100 [00:07<00:00, 13.80it/s]



Bootstrap sampling for general Sequential_NN: 100%|██████████| 100/100 [00:07<00:00, 13.42it/s]
Bootstrap sampling for general LSTM:   0%|          | 0/100 [00:00<?, ?it/s]



Bootstrap sampling for general LSTM:   2%|▏         | 2/100 [00:00<00:07, 12.59it/s]



Bootstrap sampling for general LSTM:   4%|▍         | 4/100 [00:00<00:07, 13.22it/s]



Bootstrap sampling for general LSTM:   6%|▌         | 6/100 [00:00<00:06, 13.64it/s]



Bootstrap sampling for general LSTM:   8%|▊         | 8/100 [00:00<00:06, 13.40it/s]



Bootstrap sampling for general LSTM:  10%|█         | 10/100 [00:00<00:06, 13.35it/s]



Bootstrap sampling for general LSTM:  12%|█▏        | 12/100 [00:00<00:06, 13.04it/s]



Bootstrap sampling for general LSTM:  14%|█▍        | 14/100 [00:01<00:06, 13.10it/s]



Bootstrap sampling for general LSTM:  16%|█▌        | 16/100 [00:01<00:06, 13.14it/s]



Bootstrap sampling for general LSTM:  18%|█▊        | 18/100 [00:01<00:06, 13.37it/s]



Bootstrap sampling for general LSTM:  20%|██        | 20/100 [00:01<00:06, 13.16it/s]



Bootstrap sampling for general LSTM:  22%|██▏       | 22/100 [00:01<00:05, 13.04it/s]



Bootstrap sampling for general LSTM:  24%|██▍       | 24/100 [00:01<00:05, 13.21it/s]



Bootstrap sampling for general LSTM:  26%|██▌       | 26/100 [00:01<00:05, 13.32it/s]



Bootstrap sampling for general LSTM:  28%|██▊       | 28/100 [00:02<00:05, 13.21it/s]



Bootstrap sampling for general LSTM:  30%|███       | 30/100 [00:02<00:05, 13.12it/s]



Bootstrap sampling for general LSTM:  32%|███▏      | 32/100 [00:02<00:05, 13.30it/s]



Bootstrap sampling for general LSTM:  34%|███▍      | 34/100 [00:02<00:04, 13.24it/s]



Bootstrap sampling for general LSTM:  36%|███▌      | 36/100 [00:02<00:04, 13.32it/s]



Bootstrap sampling for general LSTM:  38%|███▊      | 38/100 [00:02<00:04, 13.26it/s]



Bootstrap sampling for general LSTM:  40%|████      | 40/100 [00:03<00:04, 12.92it/s]



Bootstrap sampling for general LSTM:  42%|████▏     | 42/100 [00:03<00:04, 12.94it/s]



Bootstrap sampling for general LSTM:  44%|████▍     | 44/100 [00:03<00:04, 13.01it/s]



Bootstrap sampling for general LSTM:  46%|████▌     | 46/100 [00:03<00:04, 13.19it/s]



Bootstrap sampling for general LSTM:  48%|████▊     | 48/100 [00:03<00:04, 12.91it/s]



Bootstrap sampling for general LSTM:  50%|█████     | 50/100 [00:03<00:03, 13.10it/s]



Bootstrap sampling for general LSTM:  52%|█████▏    | 52/100 [00:03<00:03, 13.15it/s]



Bootstrap sampling for general LSTM:  54%|█████▍    | 54/100 [00:04<00:03, 13.12it/s]



Bootstrap sampling for general LSTM:  56%|█████▌    | 56/100 [00:04<00:03, 13.09it/s]



Bootstrap sampling for general LSTM:  58%|█████▊    | 58/100 [00:04<00:03, 13.27it/s]



Bootstrap sampling for general LSTM:  60%|██████    | 60/100 [00:04<00:03, 13.27it/s]



Bootstrap sampling for general LSTM:  62%|██████▏   | 62/100 [00:04<00:02, 13.32it/s]



Bootstrap sampling for general LSTM:  64%|██████▍   | 64/100 [00:04<00:02, 13.35it/s]



Bootstrap sampling for general LSTM:  66%|██████▌   | 66/100 [00:05<00:02, 13.10it/s]



Bootstrap sampling for general LSTM:  68%|██████▊   | 68/100 [00:05<00:02, 13.14it/s]



Bootstrap sampling for general LSTM:  70%|███████   | 70/100 [00:05<00:02, 13.24it/s]



Bootstrap sampling for general LSTM:  72%|███████▏  | 72/100 [00:05<00:02, 13.37it/s]



Bootstrap sampling for general LSTM:  74%|███████▍  | 74/100 [00:05<00:01, 13.15it/s]



Bootstrap sampling for general LSTM:  76%|███████▌  | 76/100 [00:05<00:01, 13.32it/s]



Bootstrap sampling for general LSTM:  78%|███████▊  | 78/100 [00:05<00:01, 13.46it/s]



Bootstrap sampling for general LSTM:  80%|████████  | 80/100 [00:06<00:01, 13.15it/s]



Bootstrap sampling for general LSTM:  82%|████████▏ | 82/100 [00:06<00:01, 13.28it/s]



Bootstrap sampling for general LSTM:  84%|████████▍ | 84/100 [00:06<00:01, 13.26it/s]



Bootstrap sampling for general LSTM:  86%|████████▌ | 86/100 [00:06<00:01, 13.24it/s]



Bootstrap sampling for general LSTM:  88%|████████▊ | 88/100 [00:06<00:00, 13.25it/s]



Bootstrap sampling for general LSTM:  90%|█████████ | 90/100 [00:06<00:00, 13.43it/s]



Bootstrap sampling for general LSTM:  92%|█████████▏| 92/100 [00:06<00:00, 13.03it/s]



Bootstrap sampling for general LSTM:  94%|█████████▍| 94/100 [00:07<00:00, 12.93it/s]



Bootstrap sampling for general LSTM:  96%|█████████▌| 96/100 [00:07<00:00, 13.05it/s]



Bootstrap sampling for general LSTM:  98%|█████████▊| 98/100 [00:07<00:00, 13.10it/s]



Bootstrap sampling for general LSTM: 100%|██████████| 100/100 [00:07<00:00, 13.17it/s]
Processing EDOs:   0%|          | 0/33 [00:00<?, ?it/s]



Processing EDOs:   3%|▎         | 1/33 [00:12<06:41, 12.56s/it]



Processing EDOs:   6%|▌         | 2/33 [00:24<06:24, 12.39s/it]



Processing EDOs:   9%|▉         | 3/33 [00:37<06:10, 12.34s/it]



Processing EDOs:  12%|█▏        | 4/33 [00:49<05:55, 12.27s/it]



Processing EDOs:  15%|█▌        | 5/33 [01:01<05:42, 12.23s/it]



Processing EDOs:  18%|█▊        | 6/33 [01:14<05:33, 12.35s/it]



Processing EDOs:  21%|██        | 7/33 [01:26<05:21, 12.38s/it]



Processing EDOs:  24%|██▍       | 8/33 [01:38<05:08, 12.36s/it]



Processing EDOs:  27%|██▋       | 9/33 [01:51<04:56, 12.34s/it]



Processing EDOs:  30%|███       | 10/33 [02:03<04:43, 12.32s/it]



Processing EDOs:  33%|███▎      | 11/33 [02:15<04:33, 12.42s/it]



Processing EDOs:  36%|███▋      | 12/33 [02:28<04:21, 12.43s/it]



Processing EDOs:  39%|███▉      | 13/33 [02:40<04:08, 12.42s/it]



Processing EDOs:  42%|████▏     | 14/33 [02:53<03:56, 12.42s/it]



Processing EDOs:  45%|████▌     | 15/33 [03:05<03:45, 12.51s/it]



Processing EDOs:  48%|████▊     | 16/33 [03:18<03:34, 12.62s/it]



Processing EDOs:  52%|█████▏    | 17/33 [03:31<03:21, 12.61s/it]



Processing EDOs:  55%|█████▍    | 18/33 [03:43<03:08, 12.56s/it]



Processing EDOs:  58%|█████▊    | 19/33 [03:56<02:55, 12.51s/it]



Processing EDOs:  61%|██████    | 20/33 [04:08<02:42, 12.47s/it]



Processing EDOs:  64%|██████▎   | 21/33 [04:21<02:31, 12.59s/it]



Processing EDOs:  67%|██████▋   | 22/33 [04:34<02:18, 12.62s/it]



Processing EDOs:  70%|██████▉   | 23/33 [04:46<02:05, 12.59s/it]



Processing EDOs:  73%|███████▎  | 24/33 [04:59<01:53, 12.57s/it]



Processing EDOs:  76%|███████▌  | 25/33 [05:11<01:40, 12.53s/it]



Processing EDOs:  79%|███████▉  | 26/33 [05:24<01:28, 12.68s/it]



Processing EDOs:  82%|████████▏ | 27/33 [05:37<01:16, 12.76s/it]



Processing EDOs:  85%|████████▍ | 28/33 [05:50<01:03, 12.71s/it]



Processing EDOs:  88%|████████▊ | 29/33 [06:02<00:50, 12.65s/it]



Processing EDOs:  91%|█████████ | 30/33 [06:15<00:37, 12.59s/it]



Processing EDOs:  94%|█████████▍| 31/33 [06:28<00:25, 12.75s/it]



Processing EDOs:  97%|█████████▋| 32/33 [06:41<00:12, 12.84s/it]



Processing EDOs: 100%|██████████| 33/33 [06:54<00:00, 12.55s/it]


Sequential_NN general predictions for the next period:
     MORENA  PRIANPRD     OTROS
0  0.525546  0.406202  0.067395
95% Confidence Intervals for Sequential_NN:
Lower - MORENA: 0.5124, PRIANPRD: 0.3920, OTROS: 0.0639
Upper - MORENA: 0.5386, PRIANPRD: 0.4218, OTROS: 0.0708

LSTM general predictions for the next period:
    MORENA  PRIANPRD     OTROS
0  0.52857   0.40329  0.064108
95% Confidence Intervals for LSTM:
Lower - MORENA: 0.5129, PRIANPRD: 0.3875, OTROS: 0.0611
Upper - MORENA: 0.5440, PRIANPRD: 0.4206, OTROS: 0.0671

Sequential_NN predictions per EDO for the next period:
      MORENA  PRIANPRD     OTROS
1   0.382152  0.325424  0.064671
2   0.382152  0.325424  0.064671
3   0.382152  0.325424  0.064671
4   0.382152  0.325424  0.064671
5   0.382152  0.325424  0.064671
6   0.382152  0.325424  0.064671
7   0.382152  0.325424  0.064671
8   0.382152  0.325424  0.064671
9   0.382152  0.325424  0.064671
10  0.382152  0.325424  0.064671
11  0.382152  0.325424  0.064671
12  0.382152  0.




In [4]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# Function to predict 'MORENA' or 'PRIANPRD' given a specific level of 'PART'
def predict_morena_prianprd(part_value, edo, model_name, target='MORENA'):
    X_new_period_edo = X_features.mean(axis=0).reshape((1, -1)).astype(np.float32)
    col_name_edo = f'EDO_{edo}'
    col_name_part = 'PART'

    if col_name_edo in df.columns:
        col_index_edo = df.columns.get_loc(col_name_edo)
        X_new_period_edo[0, col_index_edo] = 1  # Add one-hot encoded EDO column

    # Set the value of 'PART'
    if col_name_part in df.columns:
        col_index_part = df.columns.get_loc(col_name_part)
        X_new_period_edo[0, col_index_part] = part_value

    X_new_period_edo_reshaped = X_new_period_edo.reshape((X_new_period_edo.shape[0], 1, X_new_period_edo.shape[1]))

    model = trained_models[model_name]
    if model_name == 'LSTM':
        preds = model.predict(X_new_period_edo_reshaped)
    else:
        preds = model.predict(X_new_period_edo)

    preds = preds.flatten()

    if target == 'MORENA':
        return -preds[0]  # Negative for maximization
    elif target == 'PRIANPRD':
        return -preds[1]  # Negative for maximization
    else:
        return 0

# Function to find the optimal 'PART' level for a specific EDO
def optimize_part_for_edo(edo, model_name, target='MORENA'):
    result = minimize(predict_morena_prianprd, x0=[0.5], args=(edo, model_name, target), bounds=[(0, 1)])
    return result.x[0], -result.fun

# Run optimization for each EDO
optimal_part_per_edo = {}

for edo in unique_edos:
    opt_part_morena, max_morena = optimize_part_for_edo(edo, 'Sequential_NN', 'MORENA')
    opt_part_prianprd, max_prianprd = optimize_part_for_edo(edo, 'Sequential_NN', 'PRIANPRD')

    optimal_part_per_edo[edo] = {
        'Optimal_PART_Morena': opt_part_morena,
        'Max_Morena': max_morena,
        'Optimal_PART_Prianprd': opt_part_prianprd,
        'Max_Prianprd': max_prianprd
    }

# Convert results to DataFrame for easy visualization
optimal_part_df = pd.DataFrame(optimal_part_per_edo).T
print(optimal_part_df)


    Optimal_PART_Morena  Max_Morena  Optimal_PART_Prianprd  Max_Prianprd
1                   0.5    0.382152                    0.5      0.325424
2                   0.5    0.382152                    0.5      0.325424
3                   0.5    0.382152                    0.5      0.325424
4                   0.5    0.382152                    0.5      0.325424
5                   0.5    0.382152                    0.5      0.325424
6                   0.5    0.382152                    0.5      0.325424
7                   0.5    0.382152                    0.5      0.325424
8                   0.5    0.382152                    0.5      0.325424
9                   0.5    0.382152                    0.5      0.325424
10                  0.5    0.382152                    0.5      0.325424
11                  0.5    0.382152                    0.5      0.325424
12                  0.5    0.382152                    0.5      0.325424
13                  0.5    0.382152                

##...............................................................
## **All together**


In [3]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.utils import resample
from sklearn.metrics import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Ensure the data is in numpy array format and correct dtype
X_features = df.drop(columns=['TIPO', 'AÑO', 'MORENA', 'PRIANPRD', 'OTROS'])
y = df[['MORENA', 'PRIANPRD', 'OTROS']]

# Convert categorical 'EDO' column to numeric using one-hot encoding without dropping the first level
X_features = pd.get_dummies(X_features, columns=['EDO']).values.astype(np.float32)
y = y.values.astype(np.float32)

# Verify the data types
print(f"X_features dtype: {X_features.dtype}")
print(f"y dtype: {y.dtype}")

# Define function to calculate MAPE
def mean_absolute_percentage_error_custom(y_true, y_pred, threshold=1e-2):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true > threshold
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

# Define and train the Sequential Neural Network model
def build_sequential_nn(input_shape):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=input_shape))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(3, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Define and train the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(1, input_shape)))
    model.add(Dense(3, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Train and evaluate models
input_shape = (X_features.shape[1],)
X_features_reshaped = X_features.reshape((X_features.shape[0], 1, X_features.shape[1]))

models = {
    'Sequential_NN': build_sequential_nn(input_shape),
    'LSTM': build_lstm_model(X_features.shape[1])
}

trained_models = {}
mse_scores = {}
mae_scores = {}
mape_scores = {}

# Training models
for name, model in models.items():
    if name == 'LSTM':
        model.fit(X_features_reshaped, y, epochs=50, batch_size=32, verbose=0)
    else:
        model.fit(X_features, y, epochs=50, batch_size=32, verbose=0)

    preds = model.predict(X_features if name != 'LSTM' else X_features_reshaped)
    mse_scores[name] = mean_squared_error(y, preds)
    mae_scores[name] = mean_absolute_error(y, preds)
    mape_scores[name] = mean_absolute_percentage_error_custom(y, preds)
    trained_models[name] = model

# Create a DataFrame to display the errors of the selected models
errors_df = pd.DataFrame(
    [(model, mse_scores[model], mae_scores[model], mape_scores[model]) for model in models.keys()],
    columns=['Model', 'MSE', 'MAE', 'MAPE']
)

# Display the DataFrame with the errors
print("Selected Models with Errors:")
print(errors_df)

# Initialize dictionary to store predictions per EDO
predictions_per_edo = {}
ci_per_edo = {}

# Get unique labels in the 'EDO' column
unique_edos = df['EDO'].unique()

# Ensure the one-hot encoded columns exist
encoded_columns = [f'EDO_{edo}' for edo in unique_edos]
encoded_columns_in_df = [col for col in encoded_columns if col in df.columns]

# Define PART levels to evaluate
part_levels = [0.40, 0.50, 0.60]

# Initialize dictionary to store general predictions and confidence intervals
general_predictions = {}
general_ci = {}

# Calculate general predictions and confidence intervals for each PART level
for part_value in part_levels:
    for name in models.keys():
        preds_samples = []

        for _ in range(100):  # Reduced to 100 for faster computation
            X_resampled = resample(X_features).astype(np.float32)
            col_name_part = 'PART'
            if col_name_part in df.columns:
                col_index_part = df.columns.get_loc(col_name_part)
                X_resampled[:, col_index_part] = part_value

            if name == 'LSTM':
                X_resampled = X_resampled.reshape((X_resampled.shape[0], 1, X_resampled.shape[1]))
            preds_resampled = trained_models[name].predict(X_resampled)
            preds_samples.append(preds_resampled.mean(axis=0))

        preds_samples = np.array(preds_samples)
        preds_mean = preds_samples.mean(axis=0)
        preds_lower = np.percentile(preds_samples, 2.5, axis=0)
        preds_upper = np.percentile(preds_samples, 97.5, axis=0)

        general_predictions[(name, part_value)] = preds_mean
        general_ci[(name, part_value)] = (preds_lower, preds_upper)

# Iterate over each unique 'EDO' label and PART levels
for edo in unique_edos:
    for part_value in part_levels:
        # Create a dummy data point for prediction for the given EDO
        X_new_period_edo = X_features.mean(axis=0).reshape((1, -1)).astype(np.float32)

        # Check if the one-hot encoded column for this EDO exists
        col_name_edo = f'EDO_{edo}'
        col_name_part = 'PART'
        if col_name_edo in encoded_columns_in_df:
            col_index_edo = df.columns.get_loc(col_name_edo)
            X_new_period_edo[0, col_index_edo] = 1  # Add one-hot encoded EDO column

        # Set the value of PART
        if col_name_part in df.columns:
            col_index_part = df.columns.get_loc(col_name_part)
            X_new_period_edo[0, col_index_part] = part_value

        X_new_period_edo_reshaped = X_new_period_edo.reshape((X_new_period_edo.shape[0], 1, X_new_period_edo.shape[1]))

        # Store predictions and confidence intervals for the selected models
        for name in models.keys():
            preds_samples = []
            for _ in range(100):
                preds_resampled = trained_models[name].predict(X_new_period_edo if name != 'LSTM' else X_new_period_edo_reshaped)
                preds_samples.append(preds_resampled.flatten())

            preds_samples = np.array(preds_samples)
            preds_mean = preds_samples.mean(axis=0)
            preds_lower = np.percentile(preds_samples, 2.5, axis=0)
            preds_upper = np.percentile(preds_samples, 97.5, axis=0)

            predictions_per_edo.setdefault((edo, name, part_value), []).append(preds_mean)
            ci_per_edo.setdefault((edo, name, part_value), []).append((preds_lower, preds_upper))

# Convert general predictions to DataFrames and add labels
general_predictions_df = {key: pd.DataFrame([preds], columns=['MORENA', 'PRIANPRD', 'OTROS']) for key, preds in general_predictions.items()}

# Convert EDO predictions to DataFrames and add labels
predictions_df_per_edo = {key: pd.DataFrame(preds, columns=['MORENA', 'PRIANPRD', 'OTROS']) for key, preds in predictions_per_edo.items()}

# Display the general predictions and confidence intervals
for (model_name, part_value), df in general_predictions_df.items():
    print(f"\n{model_name} general predictions for PART = {part_value}:")
    print(df)
    lower_ci = general_ci[(model_name, part_value)][0]
    upper_ci = general_ci[(model_name, part_value)][1]
    print(f"95% Confidence Intervals for {model_name} at PART = {part_value}:")
    print(f"Lower - MORENA: {lower_ci[0]:.4f}, PRIANPRD: {lower_ci[1]:.4f}, OTROS: {lower_ci[2]:.4f}")
    print(f"Upper - MORENA: {upper_ci[0]:.4f}, PRIANPRD: {upper_ci[1]:.4f}, OTROS: {upper_ci[2]:.4f}")

# Display the DataFrames with EDO predictions and confidence intervals
for (edo, model_name, part_value), df in predictions_df_per_edo.items():
    print(f"\n{model_name} predictions per EDO = {edo} for PART = {part_value}:")
    print(df)
    # Debugging prints to check ci_per_edo content
    print(f"ci_per_edo[{(edo, model_name, part_value)}]: {ci_per_edo[(edo, model_name, part_value)]}")
    if len(ci_per_edo[(edo, model_name, part_value)]) < 2:
        print(f"Error: ci_per_edo[{(edo, model_name, part_value)}] has less than 2 elements.")
        continue
    lower_ci = ci_per_edo[(edo, model_name, part_value)][0]
    upper_ci = ci_per_edo[(edo, model_name, part_value)][1]
    print(f"95% Confidence Intervals for {model_name} at PART = {part_value}, EDO = {edo}:")
    print(f"Lower - MORENA: {lower_ci[0]:.4f}, PRIANPRD: {lower_ci[1]:.4f}, OTROS: {lower_ci[2]:.4f}")
    print(f"Upper - MORENA: {upper_ci[0]:.4f}, PRIANPRD: {upper_ci[1]:.4f}, OTROS: {upper_ci[2]:.4f}")


X_features dtype: float32
y dtype: float32




[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Sequential_NN general predictions for PART = 0.4:
     MORENA  PRIANPRD    OTROS
0  0.558886  0.414394  0.08102
95% Confidence Intervals for Sequential_NN at PART = 0.4:
Lower - MORENA: 0.5446, PRIANPRD: 0.4024, OTROS: 0.0777
Upper - MORENA: 0.5717, PRIANPRD: 0.4283, OTROS: 0.0841

LSTM general predictions for PART = 0.4:
     MORENA  PRIANPRD    OTROS
0  0.545843  0.414387  0.07113
95% Confidence Intervals for LSTM at PART = 0.4:
Lower - MORENA: 0.5344, PRIANPRD: 0.4015, OTROS: 0.0674
Upper - MORENA: 0.5581, PRIANPRD: 0.4256, OTROS: 0.0751

Sequential_NN general predictions for PART = 0.5:
     MORENA  PRIANPRD     OTROS
0  0.576058   0.41382  0.087863
95% Confidence Intervals for Sequential_NN at PART = 0.5:
Lower - MORENA: 0.5646, PRIANPRD: 0.4021, OTROS: 0.0849
Upper - MORENA: 0.5894, PRIANPRD: 0.4263, OTROS: 0.0918

LSTM general predictions for PART = 0.5:
     MORENA  PRIANPRD    OTROS
0  0.551527  0.415718  0.0728