# Energy study with machine learning

## Library loading

In [98]:
import re
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt

### Clean energy

In [None]:
df = pd.read_csv('./data/energia_mensual.txt',
                 sep='\t', header=None, names=['Fecha', 'Nombre', 'Valor'])

df['Valor'] = df['Valor'].str.replace(',', '.').astype(float)
final_df = pd.DataFrame(columns=['fecha'])
final_df['fecha'] = df['Fecha'].unique()

clusters = {
    'hidraulica': ['BONETE', "BAYGORRIA", 'PALMAR'],
    'sg': ['SALTO GRAND'],
    'eolica': ['ARTILLEROS', 'CARACOLES', 'C. PERALTA', 'CERRO GRANDE', 'COLONIA ARIAS', 'CORFRISA', 'ENGRAW', 'J.P. TERRA', 'KENTILUX', 'LUZ', 'MALDONADO', 'MARYSTAY', 'MELOWIND'
               'MINAS', 'MANANTIAL', '18 JULIO', 'CARAPE', 'E. FLORIDA', 'JULIETA', 'E.KIY', 'E. LIBERTAD', 'E. ROSA', 'P.E.', 'PALOMAS', 'PAMPA', 'PASTORALE', 'V.RODRI', 'PERALTA'
               'MACIEL', 'VALENTINES'],
    'biomasa': ['ALUR', 'ZENDA'],
    'termica': ['CTR 1', 'CTR 2', 'MOTORES', 'PTI A', 'PTI B'],
    'solar': ['ABRIL', 'ALBISU', 'ALTO CIELO', 'SOLAR', 'ALKO', 'LITORAL', 'DICANO', 'NARANJAL', 'FENIMA', 'JACINTA', 'NATELU', 'PETILCORAN', 'RADITION', 'TS', 'VINGANO', 'YARNEL'],
    'importacion': ['IMPORTA'],
    'exportacion': ['EXPORTA']
}

def contains_cluster(value, names):
    if pd.notna(value):
        for name in names:
            if name in value.upper():
                return True
    return False

for cluster, names in clusters.items():
    temp_df = df[df['Nombre'].apply(lambda x: contains_cluster(
        x, names))]['Valor'].groupby(df['Fecha']).sum().reset_index()
    temp_df.rename(columns={'Valor': cluster}, inplace=True)
    final_df = pd.concat([final_df, temp_df[cluster]], axis=1)

print(final_df)
final_df.to_excel('./data/energia_clean.xlsx')

### Clean meteorology

In [None]:
mag = ''
df = pd.read_excel('./data/meteorologia.xlsx', sheet_name=None)['Sheet1']

final_df = pd.DataFrame(
    {'mes': ['ene', 'feb', 'mar', 'abr', 'may', 'jun', 'jul', 'ago', 'sep', 'oct', 'nov', 'dic']})

for index, row in df.iterrows():
    if row[1] == 'Ene.':
        mag = row[0]
    else:
        loc = row[0]
        col_name = mag + '_' + loc
        for i in range(1, 13):
            val_str = str(row[i])
            num_str = re.search(r'[-+]?\d*[.,]?\d+', val_str).group()
            num_str = num_str.replace(',', '.')
            if num_str:
                final_df.at[i - 1, col_name] = float(num_str)

print(final_df)
final_df.to_excel('./data/meteorologia_clean.xlsx')

## Production study

### Energy preprocess

In [60]:
df_energy = pd.read_excel('./data/energia_clean.xlsx')

print(df_energy.columns)
print(df_energy.tail(2))

#Preprocessing
df_energy = df_energy[:-3]                           #Drop last 3 rows year 2024
df_energy = df_energy.drop(columns=['Unnamed: 0'])
df_energy['fecha'] = pd.to_datetime(df_energy['fecha'])
df_energy['numeric_date'] = df_energy['fecha'].dt.year * 100 + df_energy['fecha'].dt.month
df_energy = df_energy.drop(columns=['fecha'])

#Scaling
scaler = MinMaxScaler(feature_range=(0, 1))
array_energy = scaler.fit_transform(df_energy) 

print("ARRAY: ",array_energy.shape)
#print("DATAFRAME: ",df_energy)




Index(['Unnamed: 0', 'fecha', 'hidraulica', 'sg', 'eolica', 'biomasa',
       'termica', 'solar', 'importacion', 'exportacion'],
      dtype='object')
    Unnamed: 0    fecha  hidraulica        sg      eolica  biomasa    termica  \
49          49  2024-02  231215.698  329641.9  289302.661      0.0  3315.7458   
50          50  2024-03  114431.676  101196.9  146111.504      0.0     0.4528   

        solar  importacion  exportacion  
49  39704.042       1025.1     78702.34  
50  19253.228          0.0     33545.09  
ARRAY:  (48, 9)


### Meteorology preprocess

In [61]:
df_met = pd.read_excel('./data/meteorologia_clean.xlsx')
#print(df_met.columns)

# Preprocessing
df_met = df_met.drop(columns=['Unnamed: 0'])
month_dict = {'ene': '01', 'feb': '02', 'mar': '03', 'abr': '04', 'may': '05', 'jun': '06', 'jul': '07', 'ago': '08', 'sep': '09', 'oct': '10', 'nov': '11', 'dic': '12'}
df_met['mes'] = df_met['mes'].map(month_dict)
df_met = df_met.loc[df_met.index.repeat(4)].reset_index(drop=True)
df_met['year'] = pd.Series(np.tile(['2020', '2021', '2022', '2023'], 12))
#df_met['numeric_date'] = df_met['year']  + df_met['mes']
df_met = df_met.drop(columns=['year', 'mes'])

# Scaling
scaler = MinMaxScaler(feature_range=(0, 1))
array_met = scaler.fit_transform(df_met)


print("ARRAY: ",array_met.shape)


ARRAY:  (48, 141)


## Train and test data

In [122]:
X = array_met
y = array_energy[:, 1:2]

X_train, y_train = X[:36], y[:36]
X_test, y_test = X[36:], y[36:]

# Remodelar los datos de entrada para que sean un array 3D
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

print(X_train[2][0].shape)


(36, 1, 141) (36, 1)
(12, 1, 141) (12, 1)
(141,)


## Define model

In [138]:
model = Sequential()
model.add(LSTM(10, input_shape=(1, 141), activation='relu', return_sequences=True))
model.add(Dense(y_train.shape[1]))
model.compile(loss='mae', optimizer='adam')

history = model.fit(X_train, y_train, epochs=300, batch_size=12, validation_data=(X_test, y_test), verbose=2, shuffle=False)
y_pred = model.predict(X_test)
print(y_pred.shape)

Epoch 1/300
3/3 - 2s - loss: 0.3943 - val_loss: 0.1251 - 2s/epoch - 528ms/step
Epoch 2/300
3/3 - 0s - loss: 0.3644 - val_loss: 0.1090 - 26ms/epoch - 9ms/step
Epoch 3/300
3/3 - 0s - loss: 0.3531 - val_loss: 0.1127 - 28ms/epoch - 9ms/step
Epoch 4/300
3/3 - 0s - loss: 0.3409 - val_loss: 0.1253 - 27ms/epoch - 9ms/step
Epoch 5/300
3/3 - 0s - loss: 0.3289 - val_loss: 0.1398 - 27ms/epoch - 9ms/step
Epoch 6/300
3/3 - 0s - loss: 0.3211 - val_loss: 0.1582 - 26ms/epoch - 9ms/step
Epoch 7/300
3/3 - 0s - loss: 0.3126 - val_loss: 0.1648 - 30ms/epoch - 10ms/step
Epoch 8/300
3/3 - 0s - loss: 0.3071 - val_loss: 0.1608 - 29ms/epoch - 10ms/step
Epoch 9/300
3/3 - 0s - loss: 0.3001 - val_loss: 0.1520 - 28ms/epoch - 9ms/step
Epoch 10/300
3/3 - 0s - loss: 0.2962 - val_loss: 0.1606 - 27ms/epoch - 9ms/step
Epoch 11/300
3/3 - 0s - loss: 0.2903 - val_loss: 0.1722 - 29ms/epoch - 10ms/step
Epoch 12/300
3/3 - 0s - loss: 0.2838 - val_loss: 0.1824 - 28ms/epoch - 9ms/step
Epoch 13/300
3/3 - 0s - loss: 0.2776 - val_los

In [139]:
y_pred2 = y_pred.reshape((y_pred.shape[0], y_pred.shape[2]))
print(y_pred2.shape)
print(y_test.shape)

np.set_printoptions(suppress=True, formatter={'float_kind':'{:f}'.format})
error_percent = np.abs((y_pred2 - y_test) / (y_test + 1e-7)) * 100

avg_error = np.mean(error_percent, axis=0)
print(avg_error)

(12, 1)
(12, 1)
[287.088375]
