# Análisis de la demanda y producción de energía en Iraq

## Introducción y carga de datos

In [3]:

import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Load dataset
data = pd.read_csv('datosFinales.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index)
data.sort_index(inplace=True)
# Split data into training and validation sets
train = data.loc['2019-01-01':'2021-01-01']
valid = data.loc['2021-01-02':]

# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

# Normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(train)

# Create the data to train our model on:
time_steps = 30
X_train, y_train = create_dataset(scaled_data, time_steps)

# Reshape it [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# Build the model
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32)


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by MinMaxScaler.

In [None]:


# 2. Análisis en época de COVID
# Comparing data before and after the COVID period
pre_covid = data.loc['2019-01-01':'2020-02-23']
during_covid = data.loc['2020-02-24':'2021-01-01']

# Print the average demand and production before and during COVID (This will be printed when the script is run on the local machine)
print("Average demand and production before COVID:")
print(pre_covid[['Demanda', 'Produccion']].mean())

print("\nAverage demand and production during COVID:")
print(during_covid[['Demanda', 'Produccion']].mean())

# 3. Festivos nacionales
# Analyzing data on national holidays and training a model to identify these days based on demand and production patterns
holidays = ["2019-01-01", "2019-06-01", "2019-05-01", "2019-07-14", "2019-10-03",
            "2020-01-01", "2020-06-01", "2020-05-01", "2020-07-14", "2020-10-03",
            "2021-01-01", "2021-06-01", "2021-05-01", "2021-07-14", "2021-10-03"]

data['Holiday'] = data.index.isin(holidays).astype(int)

# Define the dataset for the LSTM model to predict holidays
X_holiday, Y_holiday = prepare_data(data[['Demanda', 'Produccion']].values, data['Holiday'].values, look_back=30)

# Define and train an LSTM model to predict national holidays
model_holiday = Sequential()
model_holiday.add(LSTM(100, input_shape=(30, 2)))
model_holiday.add(Dense(1, activation='sigmoid'))
model_holiday.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model (This will be executed on the local machine)
# model_holiday.fit(X_holiday, Y_holiday, epochs=50, batch_size=32, verbose=1)



In [None]:


# 4. Shaaban vs Ramadan
# Analyzing data during Shaaban and Ramadan periods and training a model to identify these periods
shaaban_ramadan_dates = {
    "2019": {"Shaaban": ("2019-04-06", "2019-05-05"), "Ramadan": ("2019-05-06", "2019-06-04")},
    "2020": {"Shaaban": ("2020-03-25", "2020-04-23"), "Ramadan": ("2020-04-24", "2020-05-23")},
    "2021": {"Shaaban": ("2021-03-14", "2021-04-12"), "Ramadan": ("2021-04-13", "2021-05-12")}
}

def period_identifier(row):
    for year, periods in shaaban_ramadan_dates.items():
        for period, dates in periods.items():
            if dates[0] <= row.name <= dates[1]:
                return period
    return "Other"

data['Period'] = data.apply(period_identifier, axis=1)

# Convert period names to integers for model training
period_to_int = {'Other': 0, 'Shaaban': 1, 'Ramadan': 2}
data['Period_encoded'] = data['Period'].map(period_to_int)

# Define the dataset for the LSTM model
X_period, Y_period = prepare_data(data[['Demanda', 'Produccion']].values, data['Period_encoded'].values, look_back=30)

# Define and train an LSTM model to predict Shaaban and Ramadan
model_period = Sequential()
model_period.add(LSTM(100, input_shape=(30, 2)))
model_period.add(Dense(3, activation='softmax'))  # 3 classes for Other, Shaaban, and Ramadan
model_period.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Convert Y_period to one-hot encoding
Y_period_one_hot = pd.get_dummies(Y_period).values

# Train the model (This will be executed on the local machine)
# model_period.fit(X_period, Y_period_one_hot, epochs=50, batch_size=32, verbose=1)

# 5. Dickey-Fuller Test
from statsmodels.tsa.stattools import adfuller

# Check if the time series is stationary using the Dickey-Fuller test
result = adfuller(data['Demanda'])
print("ADF Statistic for Demanda:", result[0])
print("p-value:", result[1])
print("Critical Values:", result[4])

result = adfuller(data['Produccion'])
print("\nADF Statistic for Produccion:", result[0])
print("p-value:", result[1])
print("Critical Values:", result[4])



In [None]:


# 6. Modelo ARIMA
from statsmodels.tsa.arima.model import ARIMA

# Fit an ARIMA model to the 'Demanda' series
model_arima_demanda = ARIMA(data['Demanda'], order=(5,1,0))
model_arima_demanda_fit = model_arima_demanda.fit(disp=0)

# Summary of the ARIMA model for Demanda (This will be printed when the script is run on the local machine)
print(model_arima_demanda_fit.summary())

# Fit an ARIMA model to the 'Produccion' series
model_arima_produccion = ARIMA(data['Produccion'], order=(5,1,0))
model_arima_produccion_fit = model_arima_produccion.fit(disp=0)

# Summary of the ARIMA model for Produccion (This will be printed when the script is run on the local machine)
print(model_arima_produccion_fit.summary())

# 7. LSTM for daily and monthly analysis
# For daily analysis, we can use the same data preparation, model definition, and training steps as above

# For monthly analysis, we'll first create a monthly dataset
data_monthly = data.resample('M').mean()

# Define the dataset for the LSTM model for monthly analysis
X_monthly, Y_monthly = prepare_data(data_monthly[['Demanda', 'Produccion']].values, look_back=3)

# Define and train an LSTM model for monthly analysis
model_monthly = Sequential()
model_monthly.add(LSTM(100, input_shape=(3, 2)))
model_monthly.add(Dense(2))
model_monthly.compile(loss='mean_squared_error', optimizer='adam')

# Train the model for monthly analysis (This will be executed on the local machine)
# model_monthly.fit(X_monthly, Y_monthly, epochs=50, batch_size=32, verbose=1)

