In [2]:
import pandas as pd
import numpy as np
import glob

### Carregando a base de dados de consumo de energia elétrica

In [52]:
def load_and_concat_files_on_dir_energy(path: str, sep : str) -> pd.DataFrame:
    """
    Load all csv files on a directory and concat them in a single dataframe

    Parameters
    ----------
    path : str
        Path to the directory
    sep : str
        Separator of the files

    Returns
    -------
    pd.DataFrame
        Dataframe with all the files concatenated
    """
    all_files : list = glob.glob(path + f"/*.csv")
    li : list = []
    for filename in all_files:
        print(filename)
        df : pd.DataFrame = pd.read_csv(filename, index_col=None, header=0,sep=sep)
        li.append(df)
    df_carga_full : pd.DataFrame = pd.concat(li, axis=0, ignore_index=True)
    print(f"{len(li)} files loaded")
    return df_carga_full

path : str = './data/curva_carga'
df_carga_full : pd.DataFrame = load_and_concat_files_on_dir_energy(path, ';')
df_carga_full.shape


./data/curva_carga\CURVA_CARGA_2012.csv
./data/curva_carga\CURVA_CARGA_2013.csv
./data/curva_carga\CURVA_CARGA_2014.csv
./data/curva_carga\CURVA_CARGA_2015.csv
./data/curva_carga\CURVA_CARGA_2016.csv
./data/curva_carga\CURVA_CARGA_2017.csv
./data/curva_carga\CURVA_CARGA_2018.csv
./data/curva_carga\CURVA_CARGA_2019.csv
./data/curva_carga\CURVA_CARGA_2020.csv
./data/curva_carga\CURVA_CARGA_2021.csv
./data/curva_carga\CURVA_CARGA_2022.csv
11 files loaded


(357136, 4)

In [53]:
def create_time_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Create time features from the timestamp
    Parameters
    ----------
    df : pd.DataFrame
        Dataframe with the timestamp

    Returns
    -------
    pd.DataFrame
        Dataframe with the time features
    """

    df_carga_full = df

    df_carga_full['din_instante'] = pd.to_datetime(df_carga_full['din_instante'])

    df_carga_full['year'] = df_carga_full['din_instante'].dt.year
    df_carga_full['month'] = df_carga_full['din_instante'].dt.month
    df_carga_full['day'] = df_carga_full['din_instante'].dt.day
    df_carga_full['hour'] = df_carga_full['din_instante'].dt.hour

    print(df_carga_full.info())

    return df_carga_full


df_carga_full : pd.DataFrame = create_time_features(df_carga_full)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357136 entries, 0 to 357135
Data columns (total 8 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   id_subsistema            357136 non-null  object        
 1   nom_subsistema           357136 non-null  object        
 2   din_instante             357136 non-null  datetime64[ns]
 3   val_cargaenergiahomwmed  356397 non-null  float64       
 4   year                     357136 non-null  int64         
 5   month                    357136 non-null  int64         
 6   day                      357136 non-null  int64         
 7   hour                     357136 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(4), object(2)
memory usage: 21.8+ MB
None


In [54]:
## Filtrando apenas os dados do sudeste

df_carga_full_sudeste : pd.DataFrame = df_carga_full[df_carga_full['nom_subsistema'] == 'SUDESTE']
print(df_carga_full_sudeste.shape)
df_carga_full_sudeste.head()

(89326, 8)


Unnamed: 0,id_subsistema,nom_subsistema,din_instante,val_cargaenergiahomwmed,year,month,day,hour
3,SE,SUDESTE,2012-01-01 00:00:00,28212.46,2012,1,1,0
7,SE,SUDESTE,2012-01-01 01:00:00,27511.77,2012,1,1,1
11,SE,SUDESTE,2012-01-01 02:00:00,26339.97,2012,1,1,2
15,SE,SUDESTE,2012-01-01 03:00:00,25318.01,2012,1,1,3
19,SE,SUDESTE,2012-01-01 04:00:00,24543.74,2012,1,1,4


## Carregando os dados climáticos

Usar como referência esse mapa : https://mapas.inmet.gov.br/, para pegar algumas das estações metereológicas para juntar os dados, e depois fazer as médias

Estações escolhidas : A740, A714, A739, A768, F501, A506 , A713 , A602

In [59]:
def load_and_concat_files_on_dir_wheater(path: str, sep : str) -> pd.DataFrame:
    """
    Load all csv files on a directory and concat them in a single dataframe

    Parameters
    ----------
    path : str
        Path to the directory
    sep : str
        Separator of the files

    Returns
    -------
    pd.DataFrame
        Dataframe with all the files concatenated
    """
    all_files : list = glob.glob(path + f"/*.csv")
    li : list = []
    allowed_vars : list = ['A740','A714','A739','A768','F501','A506','A713','A602']
    
    for filename in all_files:
        print(filename)
        print(filename[19:23])
        if filename[19:23] in allowed_vars:
            df : pd.DataFrame = pd.read_csv(filename, index_col=None,sep=sep,skiprows=10)
            li.append(df)
        
    df_carga_full : pd.DataFrame = pd.concat(li, axis=0, ignore_index=True)
    print(f"{len(li)} files loaded")
    return df_carga_full


path : str = './data/clima'
df_wheater : pd.DataFrame = load_and_concat_files_on_dir_wheater(path, ';')
df_wheater.shape


./data/clima\dados_A502_H_2010-01-01_2021-12-31.csv
A502
./data/clima\dados_A503_H_2010-01-01_2010-09-21.csv
A503
./data/clima\dados_A505_H_2010-01-01_2021-12-31.csv
A505
./data/clima\dados_A506_H_2010-01-01_2021-12-31.csv
A506
./data/clima\dados_A507_H_2010-01-01_2021-12-31.csv
A507
./data/clima\dados_A508_H_2010-01-01_2021-12-31.csv
A508
./data/clima\dados_A509_H_2010-01-01_2021-12-31.csv
A509
./data/clima\dados_A510_H_2010-01-01_2021-12-31.csv
A510
./data/clima\dados_A511_H_2010-01-01_2021-12-31.csv
A511
./data/clima\dados_A512_H_2010-01-01_2021-12-31.csv
A512
./data/clima\dados_A513_H_2010-01-01_2021-12-31.csv
A513
./data/clima\dados_A514_H_2010-01-01_2021-12-31.csv
A514
./data/clima\dados_A515_H_2010-01-01_2021-12-31.csv
A515
./data/clima\dados_A516_H_2010-01-01_2021-12-31.csv
A516
./data/clima\dados_A517_H_2010-01-01_2021-12-31.csv
A517
./data/clima\dados_A518_H_2010-01-01_2021-12-31.csv
A518
./data/clima\dados_A519_H_2010-01-01_2021-12-31.csv
A519
./data/clima\dados_A520_H_2010-

  df : pd.DataFrame = pd.read_csv(filename, index_col=None,sep=sep,skiprows=10)


./data/clima\dados_A740_H_2010-01-01_2021-12-31.csv
A740


  df : pd.DataFrame = pd.read_csv(filename, index_col=None,sep=sep,skiprows=10)


./data/clima\dados_A741_H_2010-01-01_2021-12-31.csv
A741
./data/clima\dados_A744_H_2017-12-19_2021-12-31.csv
A744
./data/clima\dados_A745_H_2010-01-01_2017-05-17.csv
A745
./data/clima\dados_A746_H_2010-01-01_2021-12-31.csv
A746
./data/clima\dados_A747_H_2010-01-01_2021-12-31.csv
A747
./data/clima\dados_A748_H_2010-06-18_2021-12-31.csv
A748
./data/clima\dados_A753_H_2010-01-01_2021-12-31.csv
A753
./data/clima\dados_A755_H_2011-03-28_2021-12-31.csv
A755
./data/clima\dados_A762_H_2016-11-02_2021-12-31.csv
A762
./data/clima\dados_A763_H_2017-05-14_2021-12-31.csv
A763
./data/clima\dados_A764_H_2016-10-26_2021-12-31.csv
A764
./data/clima\dados_A765_H_2017-01-31_2021-12-31.csv
A765
./data/clima\dados_A766_H_2017-02-08_2021-12-31.csv
A766
./data/clima\dados_A767_H_2017-10-24_2021-12-31.csv
A767
./data/clima\dados_A768_H_2017-05-11_2021-12-31.csv
A768
./data/clima\dados_A769_H_2017-10-19_2021-12-31.csv
A769
./data/clima\dados_A770_H_2019-07-02_2021-12-31.csv
A770
./data/clima\dados_A771_H_2018-

(742128, 23)

In [61]:
df_wheater.tail()

Unnamed: 0,Data Medicao,Hora Medicao,"PRECIPITACAO TOTAL, HORARIO(mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)","PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)",PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB),PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB),RADIACAO GLOBAL(Kj/m²),TEMPERATURA DA CPU DA ESTACAO(°C),"TEMPERATURA DO AR - BULBO SECO, HORARIA(°C)",...,TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(°C),TENSAO DA BATERIA DA ESTACAO(V),UMIDADE REL. MAX. NA HORA ANT. (AUT)(%),UMIDADE REL. MIN. NA HORA ANT. (AUT)(%),"UMIDADE RELATIVA DO AR, HORARIA(%)","VENTO, DIRECAO HORARIA (gr)(° (gr))","VENTO, RAJADA MAXIMA(m/s)","VENTO, VELOCIDADE HORARIA(m/s)",Unnamed: 22
742123,2021-12-31,1900,0,8774,1011776218,8777,8774,641046,20.0,184,...,165,159,144,89.0,87.0,87.0,44.0,97,48,
742124,2021-12-31,2000,0,8776,1012155403,8776,8772,431974,21.0,181,...,166,159,135,87.0,85.0,87.0,40.0,89,49,
742125,2021-12-31,2100,",4",8778,1012534984,8778,8775,138383,20.0,178,...,16,154,133,88.0,87.0,87.0,54.0,9,6,
742126,2021-12-31,2200,",6",8785,1013591552,8785,8777,9571,20.0,173,...,155,152,127,88.0,87.0,88.0,35.0,12,5,
742127,2021-12-31,2300,",2",8788,1013987637,8788,8785,0,19.0,172,...,154,152,126,89.0,88.0,89.0,44.0,88,5,


### Testes e experimentos

In [50]:
df : pd.DataFrame = pd.read_csv('./data/clima/dados_A505_H_2010-01-01_2021-12-31.csv', index_col=None,sep=";",skiprows=10)
print(df.shape)
df.head()

(105192, 23)


Unnamed: 0,Data Medicao,Hora Medicao,"PRECIPITACAO TOTAL, HORARIO(mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)","PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)",PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB),PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB),RADIACAO GLOBAL(Kj/m²),TEMPERATURA DA CPU DA ESTACAO(°C),"TEMPERATURA DO AR - BULBO SECO, HORARIA(°C)",...,TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(°C),TENSAO DA BATERIA DA ESTACAO(V),UMIDADE REL. MAX. NA HORA ANT. (AUT)(%),UMIDADE REL. MIN. NA HORA ANT. (AUT)(%),"UMIDADE RELATIVA DO AR, HORARIA(%)","VENTO, DIRECAO HORARIA (gr)(° (gr))","VENTO, RAJADA MAXIMA(m/s)","VENTO, VELOCIDADE HORARIA(m/s)",Unnamed: 22
0,2010-01-01,0,",2",8983,1011716525,8983,8975,3974,22.0,205,...,193,186,125,91.0,87.0,89.0,104.0,48,25,
1,2010-01-01,100,22,8993,1012842781,8993,8983,-2122,22.0,205,...,192,184,125,92.0,89.0,92.0,33.0,47,17,
2,2010-01-01,200,",4",8997,1013334328,8998,8993,",21",22.0,204,...,194,191,124,94.0,92.0,94.0,27.0,59,17,
3,2010-01-01,300,",6",899,1012669128,8997,899,"-,068",21.0,201,...,194,191,124,94.0,94.0,94.0,358.0,48,11,
4,2010-01-01,400,0,8984,1012075499,899,8984,"-,05",21.0,199,...,192,189,124,94.0,94.0,94.0,277.0,22,",1",
