In [None]:
![spam](img/EDA.png)

In [None]:
import warnings
warnings.filterwarnings('ignore')

## 1.   Exploratory Data Analysis 

<summary>
    <font size="4" color="orange"><b>1.1 Importing libraries and functions</b></font>
</summary>

In [None]:
# Basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Easy graphs with plotly
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
from plotly.offline import iplot

# Matplotlib plots look like
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15,7)
import pickle as pk
pd.options.display.max_columns = None

<summary>
    <font size="4" color="orange"><b>1.2 Loading CENACE database: 49 input variables </b></font>
</summary>

<img src="img/calendarsymbol.png" width="40" img align="left" />  

<font size="3" color="palevioletred"><b>Exogenous Calendar Features </b></font>

* **FECHA** (yy-mm-dd): Date

"Holiday" (0|1) indicator:

* **Lunes_Festivo**: Holiday Monday

* **Martes_PostFestivo**: Day after holiday Monday

* **Semana_Santa**: Holy Week

* **1_Mayo**: May 1

* **10_Mayo**: May 10

* **16_Sep**: September 16

* **2_Nov.**: November 2

* **Pre-Navidad_y_new_year**: Day before Christmas or New Year

* **Navidad_y_new_year**: Christmas or New Year

* **Post-Navidad_y_new_year**: Day after Christmas or New Year

<img src="img/lightsymbol.png" alt="drawing" width="25" img align="left" />  

<font size="3" color="palevioletred"><b>Endogenous Feature</b></font>

* **DEM_GCRNO_H$i$** (MW): Load energy demand in GCRNO (Gerencia de Control de Noroeste)  zone from hour $i$ to hour $i+1$ of the corresponding date, for $i=0,\dots 23$.

<img src="img/meteosymbol.png" alt="drawing" width="60" img align="left" />

<font size="3" color="palevioletred"><b>Exogenous Meteorological Features</b></font>

* **TMAX-CAB**, **TMIN-CAB**, **TMAX-HMO**, **TMIN-HMO**, **TMAX-OBR**, **TMIN-OBR**,**TMAX-LMO**, **TMIN-LMO**, **TMAX-CUL**, **TMIN-CUL** ($^\circ$C): Maximum and minimum temperature in Caborca, Hermosillo, Ciudad Obregón, Los Mochis and Culiacán, respectively.

* **PREC_HMO_mm**, **PREC_OBR_mm**, **PREC_LMO_mm**, **PREC_CUL_mm**  (mm/h): Precipitation in Hermosillo, Ciudad Obregón, Los Mochis and Culiacán, respectively.



In [None]:
# Importing load energy consumption CENACE database
url = "./inputs/Dataset GCRNO120522 DF.xlsx" #data
gcrno = pd.read_excel(url)
gcrno.columns

In [None]:
gcrno.describe()

#### Ploting 24 "DEM_GCRNO_HX" Features

In [None]:
#looking for outliers in TMIN Features with a Box-plot
fig = go.Figure()

fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H23'], name='DEM_GCRNO_H23'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H22'], name='DEM_GCRNO_H22'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H21'], name='DEM_GCRNO_H21'))

fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H20'], name='DEM_GCRNO_H20'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H19'], name='DEM_GCRNO_H19'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H18'], name='DEM_GCRNO_H18'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H17'], name='DEM_GCRNO_H17'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H16'], name='DEM_GCRNO_H16'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H15'], name='DEM_GCRNO_H15'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H14'], name='DEM_GCRNO_H14'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H13'], name='DEM_GCRNO_H13'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H12'], name='DEM_GCRNO_H12'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H11'], name='DEM_GCRNO_H11'))

fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H10'], name='DEM_GCRNO_H10'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H9'], name='DEM_GCRNO_H9'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H8'], name='DEM_GCRNO_H8'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H7'], name='DEM_GCRNO_H7'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H6'], name='DEM_GCRNO_H6'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H5'], name='DEM_GCRNO_H5'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H4'], name='DEM_GCRNO_H4'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H3'], name='DEM_GCRNO_H3'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H2'], name='DEM_GCRNO_H2'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H1'], name='DEM_GCRNO_H1'))
fig.add_trace(go.Box(y=gcrno['DEM_GCRNO_H0'], name='DEM_GCRNO_H0'))

fig.update_layout(title_text="DEM_GCRNO Features Description", height=1000) 
fig.update(layout_showlegend=False)
fig.show()
#iplot(fig, image='svg', filename='DEM_GCRNO_features', image_width=960, image_height=640)

<summary>
    <font size="4" color="orange"><b>1.3. Dataframe rearrangement</b></font>
</summary>

The above dataframe will be transorm in a new one with:

* *INSTANCES* (index):

    **FECHA-HORA** (Date-Hour) specified in the format yyyy-mm-dd hh:00:00
    
    
* *FEATURES*: 

    **DEMANDA** Load energy demand
    
    **DIA** (Day)
       0 Monday 
       1 Tuesday 
       2 Wednesday 
       3 Thursday 
       4 Friday 
       5 Saturday 
       6 Sunday
    
    **HORA** (Hour 0–23)
      
    **MES** (Month)
       1 January
       2 February
       3 March
       4 April
       5 May
       6 Jun
       7 July
       8 August
       9 September
       10 Octuber
       11 November
       12 December
    
    And the following characteristics with constant value with respect to the day **TMAX-CAB**, **TMIN-CAB**, **TMAX-HMO**, **TMIN-HMO**, **TMAX-OBR**, **TMIN-OBR**,**TMAX-LMO**, **TMIN-LMO**, **TMAX-CUL**, **TMIN-CUL**, **PREC_HMO_MM**, **PREC_OBR_MM**, **PREC_LMO_MM**, **PREC_CUL_MM**, **LUNES_FESTIVO**, **MARTES_POSTFESTIVO**, **SEMANA_SANTA**, **1_MAYO**, **10_MAYO**, **16_SEP**, **2_NOV.**, **PRE-NAVIDAD_Y_NEW_YEAR**, **NAVIDAD_Y_NEW_YEAR**, **POST-NAVIDAD_Y_NEW_YEAR**.

In [None]:
# Transposing hours columns from the original dataframe into rows
consumo_data = gcrno.melt(
    id_vars= ['FECHA'],
    value_vars= [f'DEM_GCRNO_H{i}' for i in range(24)],
    var_name="HORA",
    value_name="DEMANDA"
).replace(
    {f'DEM_GCRNO_H{i}': i for i in range(24)}
)
# Creating Day, Hour and Month columns
consumo_data.index = consumo_data.FECHA + pd.to_timedelta(consumo_data.HORA, unit='h')
consumo_data.sort_index(inplace=True)
consumo_data.drop(columns=['HORA'], inplace=True)
consumo_data = consumo_data.asfreq('h', method='pad')
consumo_data['FECHAHORA'] = consumo_data.index
consumo_data["DIA"] = consumo_data.index.weekday
consumo_data["HORA"] = consumo_data.index.hour
consumo_data["MES"] = consumo_data.index.month

In [None]:
# Adding columns of exogenous variables
exogenas = gcrno[['FECHA','TMAX-CAB', 'TMAX-HMO', 'TMAX-OBR', 'TMAX-LMO', 'TMAX-CUL', 'TMIN-CAB',
       'TMIN-HMO', 'TMIN-OBR', 'TMIN-LMO', 'TMIN-CUL', 'PREC_HMO_MM',
       'PREC_OBR_MM', 'PREC_LMO_MM', 'PREC_CUL_MM', 'LUNES_FESTIVO',
       'MARTES_POSTFESTIVO', 'SEMANA_SANTA', '1_MAYO', '10_MAYO', '16_SEP',
       '2_NOV.', 'PRE-NAVIDAD_Y_NEW_YEAR', 'NAVIDAD_Y_NEW_YEAR',
       'POST-NAVIDAD_Y_NEW_YEAR']]
consumo = pd.merge(consumo_data, exogenas, on='FECHA', how='left')

In [None]:
# Setting as index the DATE-HOUR
consumo.set_index("FECHAHORA", inplace=True)
consumo=consumo.asfreq('h')

In [None]:
consumo

In [None]:
consumo['DIA'].unique()

In [None]:
# Verifying existence of missing data 
consumo.info()

<summary>
    <font size="4" color="orange"><b>2. Exploring variables</b></font>
</summary>

<br/>

<summary>
    <img src="img/lightsymbol.png" alt="drawing" width="15" img align="left" /> 
    <font size="3" color="palevioletred"><b>Energy Demand</b></font>
</summary>

In [None]:
#looking for more information on endogenous Features Demanda
consumo['DEMANDA'].describe()

#### This chart shows a similar figure year after year, it is also possible to see an ascending trend

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=consumo.index, y=consumo['DEMANDA'],
                    mode='lines',
                    name='Energy Demand'))
                                         
fig.update_layout(title_text="Energy Demand period 01/01/07-01/05/22", height=600) 


fig.show()
#iplot(fig, image='svg', filename='energy_demand_time_series', image_width=960, image_height=640)

In [None]:
consumo_week = consumo[(consumo.index >= '2022-05-02 00:00:00') &(consumo.index < '2022-05-09 00:00:00')]
fig = go.Figure()
fig.add_trace(go.Scatter(x=consumo_week.index, y=consumo_week['DEMANDA'],
                    mode='lines',
                    name='Energy Demand'))
                                         
fig.update_layout(title_text="Energy Demand: week example 02/05/22-08/05/22", height=600) 


fig.show()
#iplot(fig, image='svg', filename='week_energy_demand_time_series', image_width=960, image_height=640)

In [None]:
consumo_day = consumo[(consumo.index >= '2022-05-08 00:00:00') &(consumo.index < '2022-05-09 00:00:00')]
fig = go.Figure()
fig.add_trace(go.Scatter(x=consumo_day.index, y=consumo_day['DEMANDA'],
                    mode='lines',
                    name='Energy Demand'))
                                         
fig.update_layout(title_text="Energy Demand: day example 08/05/22", height=600) 


fig.show()
#iplot(fig, image='svg', filename='day_energy_demand_time_series', image_width=960, image_height=640)

In [None]:
#looking for outliers with a Box-plot

fig = go.Figure()
fig.add_trace(go.Box(x=consumo['DEMANDA'], name='Energy Demand'))
fig.update_layout(title_text="Box Plot Energy Demand period 01/01/07-xx/05/22", height=600) 
fig.show()

In [None]:
#Histogram of Demanada
fig = px.histogram(consumo, x="DEMANDA", nbins=12, title="Histogram Energy Demand period 01/01/07-xx/05/22")
fig.show()

<summary>
    <img src="img/meteosymbol.png" alt="drawing" width="60" img align="left" />
    <font size="3" color="palevioletred"><b>Exogenous Meteorological Features</b></font>
</summary>

In [None]:
#looking for more information on enxogenous Features TMAX & TMIN
consumo[['TMAX-CAB', 'TMAX-HMO', 'TMAX-OBR', 'TMAX-LMO', 'TMAX-CUL', 'TMIN-CAB',
       'TMIN-HMO', 'TMIN-OBR', 'TMIN-LMO', 'TMIN-CUL']].describe()

#### Temperature features
In order to understand the behavior of the energy demand data, we divided our data into the 4 seasons of the year.

In [None]:
consumo_summer = consumo[(consumo['FECHA'] >= '2007-06-21') 
                   & (consumo['FECHA'] < '2007-09-23')]

consumo_autumn = consumo[(consumo['FECHA'] >= '2007-09-23') 
                   & (consumo['FECHA'] < '2007-12-21')]

consumo_winter = consumo[(consumo['FECHA'] >= '2007-12-21') 
                   & (consumo['FECHA'] < '2008-03-21')]

consumo_spring = consumo[(consumo['FECHA'] >= '2007-03-21') 
                   & (consumo['FECHA'] < '2007-06-21')]


for i in range(8,22):
    if len(str(i)) == 1:
        year = f'200{i}'
    else:
        year = f'20{i}'
        
        summer = consumo[(consumo['FECHA'] >= f'{year}-06-21') 
                   & (consumo['FECHA'] < f'{year}-09-23')]
    
        autumn = consumo[(consumo['FECHA'] >= f'{year}-09-23') 
                   & (consumo['FECHA'] < f'{year}-12-21')]

        winter = consumo[(consumo['FECHA'] >= f'{year}-12-21') 
                   & (consumo['FECHA'] < f'{int(year)+1}-03-21')]

        spring = consumo[(consumo['FECHA'] >= f'{year}-03-21') 
                   & (consumo['FECHA'] < f'{year}-06-21')]
        
        consumo_summer = pd.concat([consumo_summer, summer])
        consumo_autumn = pd.concat([consumo_autumn, autumn])
        consumo_winter = pd.concat([consumo_winter, winter])
        consumo_spring = pd.concat([consumo_spring, spring])
        
    

In [None]:
#CABORCA MAX & MIN TEMP 4 SEASONS
fig = go.Figure()

fig.add_trace(go.Box(y=consumo_spring['TMAX-CAB'], name='SPRING TMAX-CAB', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))
fig.add_trace(go.Box(y=consumo_spring['TMIN-CAB'], name='SPRING TMIN-CAB', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))

fig.add_trace(go.Box(y=consumo_summer['TMAX-CAB'], name='SUMMER TMAX-CAB', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))
fig.add_trace(go.Box(y=consumo_summer['TMIN-CAB'], name='SUMMER TMIN-CAB', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))

fig.add_trace(go.Box(y=consumo_autumn['TMAX-CAB'], name='AUTUMN TMAX-CAB', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))
fig.add_trace(go.Box(y=consumo_autumn['TMIN-CAB'], name='AUTUMN TMIN-CAB', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))

fig.add_trace(go.Box(y=consumo_winter['TMAX-CAB'], name='WINTER TMAX-CAB', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))
fig.add_trace(go.Box(y=consumo_winter['TMIN-CAB'], name='WINTER TMIN-CAB', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))

fig.update_layout(title_text="CABORCA MAX & MIN TEMP 4 SEASONS", height=600) 
fig.show()
#iplot(fig, image='svg', filename='caborca_tmax_tmin', image_width=960, image_height=640)

In [None]:
#HERMOSILLO MAX & MIN TEMP 4 SEASONS
fig = go.Figure()

fig.add_trace(go.Box(y=consumo_spring['TMAX-HMO'], name='SPRING TMAX-HMO', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))
fig.add_trace(go.Box(y=consumo_spring['TMIN-HMO'], name='SPRING TMIN-HMO', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))

fig.add_trace(go.Box(y=consumo_summer['TMAX-HMO'], name='SUMMER TMAX-HMO', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))
fig.add_trace(go.Box(y=consumo_summer['TMIN-HMO'], name='SUMMER TMIN-HMO', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))

fig.add_trace(go.Box(y=consumo_autumn['TMAX-HMO'], name='AUTUMN TMAX-HMO', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))
fig.add_trace(go.Box(y=consumo_autumn['TMIN-HMO'], name='AUTUMN TMIN-HMO', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))

fig.add_trace(go.Box(y=consumo_winter['TMAX-HMO'], name='WINTER TMAX-HMO', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))
fig.add_trace(go.Box(y=consumo_winter['TMIN-HMO'], name='WINTER TMIN-HMO', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))

fig.update_layout(title_text="HERMOSILLO MAX & MIN TEMP 4 SEASONS", height=600) 
fig.show()
#iplot(fig, image='svg', filename='hillo_tmax_tmin', image_width=960, image_height=640)

In [None]:
#OBREGON MAX & MIN TEMP 4 SEASONS
fig = go.Figure()

fig.add_trace(go.Box(y=consumo_spring['TMAX-OBR'], name='SPRING TMAX-OBR', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))
fig.add_trace(go.Box(y=consumo_spring['TMIN-OBR'], name='SPRING TMIN-OBR', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))

fig.add_trace(go.Box(y=consumo_summer['TMAX-OBR'], name='SUMMER TMAX-OBR', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))
fig.add_trace(go.Box(y=consumo_summer['TMIN-OBR'], name='SUMMER TMIN-OBR', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))

fig.add_trace(go.Box(y=consumo_autumn['TMAX-OBR'], name='AUTUMN TMAX-OBR', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))
fig.add_trace(go.Box(y=consumo_autumn['TMIN-OBR'], name='AUTUMN TMIN-OBR', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))

fig.add_trace(go.Box(y=consumo_winter['TMAX-OBR'], name='WINTER TMAX-OBR', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))
fig.add_trace(go.Box(y=consumo_winter['TMIN-OBR'], name='WINTER TMIN-OBR', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))

fig.update_layout(title_text="OBREGON MAX & MIN TEMP 4 SEASONS", height=600) 
fig.show()
#iplot(fig, image='svg', filename='obregon_tmax_tmin', image_width=960, image_height=640)

In [None]:
#LOS MOCHIS MAX & MIN TEMP 4 SEASONS
fig = go.Figure()

fig.add_trace(go.Box(y=consumo_spring['TMAX-LMO'], name='SPRING TMAX-LMO', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))
fig.add_trace(go.Box(y=consumo_spring['TMIN-LMO'], name='SPRING TMIN-LMO', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))

fig.add_trace(go.Box(y=consumo_summer['TMAX-LMO'], name='SUMMER TMAX-LMO', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))
fig.add_trace(go.Box(y=consumo_summer['TMIN-LMO'], name='SUMMER TMIN-LMO', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))

fig.add_trace(go.Box(y=consumo_autumn['TMAX-LMO'], name='AUTUMN TMAX-LMO', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))
fig.add_trace(go.Box(y=consumo_autumn['TMIN-LMO'], name='AUTUMN TMIN-LMO', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))

fig.add_trace(go.Box(y=consumo_winter['TMAX-LMO'], name='WINTER TMAX-LMO', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))
fig.add_trace(go.Box(y=consumo_winter['TMIN-LMO'], name='WINTER TMIN-LMO', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))

fig.update_layout(title_text="LOS MOCHIS MAX & MIN TEMP 4 SEASONS", height=600) 
fig.show()

In [None]:
#CULIACAN MAX & MIN TEMP 4 SEASONS
fig = go.Figure()

fig.add_trace(go.Box(y=consumo_spring['TMAX-CUL'], name='SPRING TMAX-CUL', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))
fig.add_trace(go.Box(y=consumo_spring['TMIN-CUL'], name='SPRING TMIN-CUL', fillcolor="rgba(158, 202, 57,0.5)", line=dict(color="#9eca39")))

fig.add_trace(go.Box(y=consumo_summer['TMAX-CUL'], name='SUMMER TMAX-CUL', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))
fig.add_trace(go.Box(y=consumo_summer['TMIN-CUL'], name='SUMMER TMIN-CUL', fillcolor="rgba(249, 190, 4,0.5)", line=dict(color="#F9BE04")))

fig.add_trace(go.Box(y=consumo_autumn['TMAX-CUL'], name='AUTUMN TMAX-CUL', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))
fig.add_trace(go.Box(y=consumo_autumn['TMIN-CUL'], name='AUTUMN TMIN-CUL', fillcolor="rgba(198, 111, 66,0.5)", line=dict(color="#C66F42")))

fig.add_trace(go.Box(y=consumo_winter['TMAX-CUL'], name='WINTER TMAX-CUL', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))
fig.add_trace(go.Box(y=consumo_winter['TMIN-CUL'], name='WINTER TMIN-CUL', fillcolor="rgba(112, 163, 187,0.5)", line=dict(color="#70A3BB")))

fig.update_layout(title_text="CULIACAN MAX & MIN TEMP 4 SEASONS", height=600) 
fig.show()
#iplot(fig, image='svg', filename='culiacan_tmax_tmin', image_width=960, image_height=640)

#### Precipitation

In [None]:
#looking for more information on enxogenous Features Rainfall (PREC_XX_MM)
consumo[['PREC_HMO_MM','PREC_OBR_MM', 'PREC_LMO_MM', 'PREC_CUL_MM']].describe()

In [None]:
#HERMOSILLO precipitation data

# precipitation < 5 mm
gcrno_prec_hillo = consumo[consumo['PREC_HMO_MM']<=4.9]
gcrno_prec_hillo = pd.melt(gcrno_prec_hillo, value_vars=['PREC_HMO_MM'])

# precipitation > 5 mm
prec_hillo = consumo[consumo['PREC_HMO_MM']>4.9]
prec_hillo = pd.melt(prec_hillo, value_vars=['PREC_HMO_MM'])
prec_hillo_violin = prec_hillo
prec_hillo.replace({'PREC_HMO_MM':'PREC_HMO_MM_>4.9'}, inplace=True)

#precipitation dataframe
prec_hillo = pd.concat([gcrno_prec_hillo, prec_hillo])

prec_hillo["color"] = prec_hillo["variable"].map(
{"PREC_HMO_MM":"#F7C672", 
"PREC_HMO_MM_>4.9":"#17219C"}
)

#qtys dataframe
prec_hillo_status = prec_hillo['color'].value_counts().rename_axis('color').reset_index(name='qty_secs')
prec_hillo_status['percent'] = prec_hillo_status['qty_secs']/5611
prec_hillo_status

In [None]:
#ploting histogram HERMOSILLO precipitation
fig = px.histogram(
    prec_hillo,
    x="value",
    labels={"Close": "Closing Price"},
    title="Rainfall Hermosillo PREC_HMO_MM vs PREC_HMO_MM_>_4.9",
    barmode="overlay",
    color="variable",
    color_discrete_map={
        "PREC_HMO_MM":"#F7C672", 
        "PREC_HMO_MM_>4.9":"#29A5D0"}
    
)
fig.update_yaxes(type="log")
fig.update_layout(yaxis_title="Number of Days  (Log)")
fig.update_layout(xaxis_title="Precipitation mm")
fig.show()
#iplot(fig, image='svg', filename='hillo_rainfall', image_width=960, image_height=640)

In [None]:
#CD. OBREGÓN precipitation data

# precipitation < 5 mm
gcrno_prec_obr = consumo[consumo['PREC_OBR_MM']<=4.9]
gcrno_prec_obr = pd.melt(gcrno_prec_obr, value_vars=['PREC_OBR_MM'])

# precipitation > 5 mm
prec_obr = consumo[consumo['PREC_OBR_MM']>4.9]
prec_obr = pd.melt(prec_obr, value_vars=['PREC_OBR_MM'])

#precipitation dataframe
prec_obr_violin = prec_obr
prec_obr.replace({'PREC_OBR_MM':'PREC_OBR_MM_>4.9'}, inplace=True)
prec_obr = pd.concat([gcrno_prec_obr, prec_obr])

prec_obr["color"] = prec_obr["variable"].map(
{"PREC_OBR_MM":"#F7C672", 
"PREC_OBR_MM_>4.9":"#17219C"}
)

#qtys dataframe
prec_obr_status = prec_obr['color'].value_counts().rename_axis('color').reset_index(name='qty_secs')
prec_obr_status['percent'] = prec_obr_status['qty_secs']/5611
prec_obr_status


In [None]:
#ploting histogram CD. OBREGÓN  precipitation
fig = px.histogram(
    prec_obr,
    x="value",
    labels={"Close": "Closing Price"},
    title="Rainfall Cd. Obregón PREC_OBR_MM vs. PREC_OBR_MM_>4.9",
    barmode="overlay",
    color="variable",
    color_discrete_map={
        "PREC_OBR_MM":"#F7C672", 
        "PREC_OBR_MM_>4.9":"#29A5D0"}
    
)
fig.update_yaxes(type="log")
fig.update_layout(yaxis_title="Number of Days (Log)")
fig.update_layout(xaxis_title="Precipitation mm")
fig.show()
#iplot(fig, image='svg', filename='obregon_rainfall', image_width=960, image_height=640)

In [None]:
#LOS MOCHIS precipitation data

# precipitation < 5 mm
gcrno_prec = consumo[consumo['PREC_LMO_MM']<=4.9]
gcrno_prec = pd.melt(gcrno_prec, value_vars=['PREC_LMO_MM'])

# precipitation > 5 mm
prec = consumo[consumo['PREC_LMO_MM']>4.9]
prec = pd.melt(prec, value_vars=['PREC_LMO_MM'])

#precipitation dataframe
prec_lmo_violin = prec
prec.replace({'PREC_LMO_MM':'PREC_LMO_MM_>4.9'}, inplace=True)
prec = pd.concat([gcrno_prec, prec])

prec["color"] = prec["variable"].map(
{"PREC_LMO_MM":"#F7C672", 
 "PREC_LMO_MM_>4.9":"#29A5D0"}
)

#qtys dataframe
prec_status = prec['color'].value_counts().rename_axis('color').reset_index(name='qty_secs')
prec_status['percent'] = prec_status['qty_secs']/5611
prec_status

In [None]:
#ploting histogram LOS MOCHIS  precipitation
fig = px.histogram(
    prec,
    x="value",
    labels={"Close": "Closing Price"},
    title="Rainfall Los Mochis PREC_LMO_MM vs PREC_LMO_MM_>_4.9",
    barmode="overlay",
    color="variable",
    color_discrete_map={
        "PREC_LMO_MM":"#F7C672", 
        "PREC_LMO_MM_>4.9":"#29A5D0"}
    
)
fig.update_yaxes(type="log")
fig.update_layout(yaxis_title="Number of Days")
fig.update_layout(xaxis_title="Precipitation mm")
fig.show()
#iplot(fig, image='svg', filename='los_mochis_rainfall', image_width=960, image_height=640)

In [None]:
#CULIACAN precipitation data

# precipitation < 5 mm
gcrno_prec = consumo[consumo['PREC_CUL_MM']<=4.9]
gcrno_prec = pd.melt(gcrno_prec, value_vars=['PREC_CUL_MM'])

# precipitation > 5 mm
prec = consumo[consumo['PREC_CUL_MM']>4.9]
prec = pd.melt(prec, value_vars=['PREC_CUL_MM'])

#precipitation dataframe
prec_cul_violin = prec
prec.replace({'PREC_CUL_MM':'PREC_CUL_MM>4.9'}, inplace=True)
prec = pd.concat([gcrno_prec, prec])

prec["color"] = prec["variable"].map(
{"PREC_CUL_MM":"#F7C672", 
"PREC_CUL_MM>4.9":"#17219C"}
)

#qtys dataframe
prec_status = prec['color'].value_counts().rename_axis('color').reset_index(name='qty_secs')
prec_status['percent'] = prec_status['qty_secs']/5611
prec_status

In [None]:
#ploting histogram CULIACAN  precipitation
fig = px.histogram(
    prec,
    x="value",
    labels={"Close": "Closing Price"},
    title="Rainfall Culiacán PREC_CUL_MM vs. PREC_CUL_MM_>_4.9",
    barmode="overlay",
    color="variable",
    color_discrete_map={
        "PREC_CUL_MM":"#F7C672", 
        "PREC_CUL_MM>4.9":"#29A5D0"}
    
)
fig.update_yaxes(type="log")
fig.update_layout(yaxis_title="Number of Days")
fig.update_layout(xaxis_title="Precipitation mm")
fig.show()
#iplot(fig, image='svg', filename='culiacan_rainfall', image_width=960, image_height=640)

In [None]:
#correlation DEMANDA & Exogenuos Meteorological Features

corrMatrix = consumo[['DEMANDA','TMAX-CAB', 'TMAX-HMO', 'TMAX-OBR', 'TMAX-LMO', 'TMAX-CUL', 'TMIN-CAB',
       'TMIN-HMO', 'TMIN-OBR', 'TMIN-LMO', 'TMIN-CUL','PREC_HMO_MM','PREC_OBR_MM', 'PREC_LMO_MM', 'PREC_CUL_MM']].corr()
sns.heatmap(corrMatrix, annot=True)
plt.show()

In [None]:
# Multiple scatter Matrix DEMANDA vs TMAX Exogenuos Meteorological Features
fig = px.scatter_matrix(consumo, dimensions=['TMAX-CAB', 'TMAX-HMO', 'TMAX-OBR', 'TMAX-LMO', 'TMAX-CUL'], color="DEMANDA")
fig.update_layout(title_text="Multiple scatter Matrix DEMANDA vs TMAX Exogenuos Meteorological Features", height=1200) 
fig.show()

In [None]:
# Multiple scatter Matrix DEMANDA vs TMIN Exogenuos Meteorological Features
fig = px.scatter_matrix(consumo, dimensions=['TMIN-CAB', 'TMIN-HMO', 'TMIN-OBR', 'TMIN-LMO', 'TMIN-CUL'], color="DEMANDA")
fig.update_layout(title_text="Multiple scatter Matrix DEMANDA vs TMIN Exogenuos Meteorological Features", height=1200) 
fig.show()

In [None]:
# Multiple scatter Matrix DEMANDA vs Rainfall Exogenuos Meteorological Features
fig = px.scatter_matrix(consumo, dimensions=['PREC_HMO_MM','PREC_OBR_MM', 'PREC_LMO_MM', 'PREC_CUL_MM'], color="DEMANDA")
fig.update_layout(title_text="Multiple scatter Matrix DEMANDA vs Rainfall Exogenuos Meteorological Features", height=1200) 
fig.show()

<summary>
    <img src="img/calendarsymbol.png" width="25" img align="left" />  
    <font size="3" color="palevioletred"><b>Exogenous Calendar Features </b></font>
</summary>    

In [None]:
#correlation DEMANDA & Exogenuos Calendar Features

corrMatrix = consumo[['LUNES_FESTIVO',
       'MARTES_POSTFESTIVO', 'SEMANA_SANTA', '1_MAYO', '10_MAYO', '16_SEP',
       '2_NOV.', 'PRE-NAVIDAD_Y_NEW_YEAR', 'NAVIDAD_Y_NEW_YEAR',
       'POST-NAVIDAD_Y_NEW_YEAR']].corr()
sns.heatmap(corrMatrix, annot=True)
plt.show()

In [None]:
from IPython import display
display.Image("https://mcd.unison.mx/wp-content/themes/awaken/img/logo_mcd.png", embed = True)

<summary>
    <font size="4" color="gray"> Maestría en Ciencia de Datos | Universidad de Sonora </font>
</summary>
<font size="1" color="gray"> Blvd. Luis Encinas y Rosales s/n Col. Centro. Edificio 3K1 planta baja C.P. 83000, Hermosillo, Sonora, México </font>
<font size="1" color="gray"> mcd@unison.mx </font>
<font size="1" color="gray"> Tel: +52 (662) 259 2155  </font>