In [1]:
import sys
import os

# Adiciona o diretório src ao sys.path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

# Importa os módulos
from data_fetching import fetch_inmet_data
from regression_pipeline import preprocess_data, train_and_evaluate_model, impute_missing_values, run_pipeline

In [2]:
import pandas as pd
import geopandas as gpd

# Composição do dataset

In [3]:
df_meta_dados = pd.read_csv('../dados/meta_dados_estacoes_2023.csv')
df_meta_dados.head()

Unnamed: 0,ano,mes,data,hora,id_estacao,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,...,temperatura_max,temperatura_min,temperatura_orvalho_max,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade
0,2023,4,2023-04-09,03:00:00,A340,1.8,997.9,998.7,997.8,,...,24.3,24.1,24.1,23.6,99.0,96.0,99.0,287.0,4.2,1.9
1,2023,4,2023-04-09,12:00:00,A367,0.0,987.0,987.3,986.6,415.8,...,23.2,22.3,23.0,22.2,99.0,99.0,99.0,244.0,5.0,1.2
2,2023,4,2023-04-09,00:00:00,A308,0.0,1007.1,1007.1,1006.4,0.0,...,25.1,24.7,24.8,24.6,99.0,99.0,99.0,133.0,3.8,1.9
3,2023,12,2023-12-21,15:00:00,A369,0.0,989.2,990.4,989.2,2249.1,...,31.7,28.4,31.7,30.0,100.0,99.0,99.0,,,
4,2023,12,2023-12-21,18:00:00,A453,0.0,981.8,982.4,981.8,200.0,...,24.1,23.8,23.9,23.6,99.0,98.0,99.0,149.0,6.3,3.2


Vamos utilizar apenas a temperatura máxima e mínima do dia

In [4]:
# remover colunas desnecessárias
df_meta_dados_novo = df_meta_dados.drop(columns=['radiacao_global', 'temperatura_bulbo_hora', 'temperatura_orvalho_hora',
                                            'temperatura_orvalho_max', 'temperatura_orvalho_min', 'umidade_rel_max',
                                            'umidade_rel_min', 'umidade_rel_hora','mes','ano'])

df_meta_dados_novo.head()

Unnamed: 0,data,hora,id_estacao,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,temperatura_max,temperatura_min,vento_direcao,vento_rajada_max,vento_velocidade
0,2023-04-09,03:00:00,A340,1.8,997.9,998.7,997.8,24.3,24.1,287.0,4.2,1.9
1,2023-04-09,12:00:00,A367,0.0,987.0,987.3,986.6,23.2,22.3,244.0,5.0,1.2
2,2023-04-09,00:00:00,A308,0.0,1007.1,1007.1,1006.4,25.1,24.7,133.0,3.8,1.9
3,2023-12-21,15:00:00,A369,0.0,989.2,990.4,989.2,31.7,28.4,,,
4,2023-12-21,18:00:00,A453,0.0,981.8,982.4,981.8,24.1,23.8,149.0,6.3,3.2


In [5]:
# Agrupar por dia pegando a temperatura máxima e mínima
df_meta_dados_novo = df_meta_dados_novo.groupby(['id_estacao','data']).agg({'temperatura_max':'max','temperatura_min':'min'}).reset_index()
df_meta_dados_novo.head()

Unnamed: 0,id_estacao,data,temperatura_max,temperatura_min
0,A305,2023-01-01,32.4,25.9
1,A305,2023-01-02,32.6,24.4
2,A305,2023-01-03,32.6,24.6
3,A305,2023-01-04,32.7,25.2
4,A305,2023-01-05,32.9,25.5


In [6]:
# Varificando se as estações possuem todos os dias de 2023
df_meta_dados_novo['id_estacao'].value_counts().loc[lambda x : x < 364]

Series([], Name: id_estacao, dtype: int64)

Todas as estações possuem uma contagem de pelo menos 364 dias

In [7]:
df_meta_dados_novo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30576 entries, 0 to 30575
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id_estacao       30576 non-null  object 
 1   data             30576 non-null  object 
 2   temperatura_max  26142 non-null  float64
 3   temperatura_min  26142 non-null  float64
dtypes: float64(2), object(2)
memory usage: 955.6+ KB


Temos uma quantidade muito grande de valores nulos, vamos criar um modelo para tentar inferir esses valores

In [8]:
df_meta_dados = pd.read_csv('../dados/meta_dados_estacoes_2023.csv')
df_meta_dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 733824 entries, 0 to 733823
Data columns (total 22 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   ano                       733824 non-null  int64  
 1   mes                       733824 non-null  int64  
 2   data                      733824 non-null  object 
 3   hora                      733824 non-null  object 
 4   id_estacao                733824 non-null  object 
 5   precipitacao_total        510665 non-null  float64
 6   pressao_atm_hora          588148 non-null  float64
 7   pressao_atm_max           587454 non-null  float64
 8   pressao_atm_min           587451 non-null  float64
 9   radiacao_global           339095 non-null  float64
 10  temperatura_bulbo_hora    585996 non-null  float64
 11  temperatura_orvalho_hora  551192 non-null  float64
 12  temperatura_max           585332 non-null  float64
 13  temperatura_min           585328 non-null  f

In [9]:
df_meta_dados.isnull().sum()

ano                              0
mes                              0
data                             0
hora                             0
id_estacao                       0
precipitacao_total          223159
pressao_atm_hora            145676
pressao_atm_max             146370
pressao_atm_min             146373
radiacao_global             394729
temperatura_bulbo_hora      147828
temperatura_orvalho_hora    182632
temperatura_max             148492
temperatura_min             148496
temperatura_orvalho_max     184453
temperatura_orvalho_min     184468
umidade_rel_max             176355
umidade_rel_min             176080
umidade_rel_hora            174502
vento_direcao               184662
vento_rajada_max            185800
vento_velocidade            179944
dtype: int64

In [10]:
df_meta_dados.head()

Unnamed: 0,ano,mes,data,hora,id_estacao,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,...,temperatura_max,temperatura_min,temperatura_orvalho_max,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade
0,2023,4,2023-04-09,03:00:00,A340,1.8,997.9,998.7,997.8,,...,24.3,24.1,24.1,23.6,99.0,96.0,99.0,287.0,4.2,1.9
1,2023,4,2023-04-09,12:00:00,A367,0.0,987.0,987.3,986.6,415.8,...,23.2,22.3,23.0,22.2,99.0,99.0,99.0,244.0,5.0,1.2
2,2023,4,2023-04-09,00:00:00,A308,0.0,1007.1,1007.1,1006.4,0.0,...,25.1,24.7,24.8,24.6,99.0,99.0,99.0,133.0,3.8,1.9
3,2023,12,2023-12-21,15:00:00,A369,0.0,989.2,990.4,989.2,2249.1,...,31.7,28.4,31.7,30.0,100.0,99.0,99.0,,,
4,2023,12,2023-12-21,18:00:00,A453,0.0,981.8,982.4,981.8,200.0,...,24.1,23.8,23.9,23.6,99.0,98.0,99.0,149.0,6.3,3.2


In [11]:
# Nome da coluna alvo
target_column = 'temperatura_max'

# Processar os dados
df, X_train_scaled, X_test_scaled, y_train, y_test, scaler, label_encoder = preprocess_data(df_meta_dados, target_column)
df.head()

Unnamed: 0,mes,hora,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,temperatura_bulbo_hora,temperatura_orvalho_hora,temperatura_max,...,temperatura_orvalho_max,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade,dia,id_estacao_encoded
1,4,12.0,0.0,987.0,987.3,986.6,415.8,23.2,23.0,23.2,...,23.0,22.2,99.0,99.0,99.0,244.0,5.0,1.2,9,44
2,4,0.0,0.0,1007.1,1007.1,1006.4,0.0,24.7,24.6,25.1,...,24.8,24.6,99.0,99.0,99.0,133.0,3.8,1.9,9,3
4,12,18.0,0.0,981.8,982.4,981.8,200.0,24.0,23.8,24.1,...,23.9,23.6,99.0,98.0,99.0,149.0,6.3,3.2,21,77
8,12,9.0,0.0,983.3,983.3,982.7,81.7,23.1,22.9,23.1,...,23.0,22.9,99.0,99.0,99.0,108.0,5.1,2.3,22,77
9,4,18.0,0.0,1006.9,1007.5,1006.9,2387.7,34.7,6.1,35.1,...,8.6,6.0,21.0,17.0,17.0,143.0,5.7,1.9,11,15


In [12]:
df.isnull().sum()

mes                         0
hora                        0
precipitacao_total          0
pressao_atm_hora            0
pressao_atm_max             0
pressao_atm_min             0
radiacao_global             0
temperatura_bulbo_hora      0
temperatura_orvalho_hora    0
temperatura_max             0
temperatura_min             0
temperatura_orvalho_max     0
temperatura_orvalho_min     0
umidade_rel_max             0
umidade_rel_min             0
umidade_rel_hora            0
vento_direcao               0
vento_rajada_max            0
vento_velocidade            0
dia                         0
id_estacao_encoded          0
dtype: int64

# XGBoostRegressor

In [13]:
# Treinar o modelo e avaliar seu desempenho
best_model = train_and_evaluate_model(X_train_scaled, X_test_scaled, y_train, y_test)

Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08772804412588131
Mean Absolute Error: 0.21529406736107076
R² Score: 0.9959335986243029


**Mean Squared Error (MSE)**:
   - **O que é**: Mede a média dos quadrados dos erros entre os valores previstos e os valores reais.
   - **Valor**: 0.0877
   - **Interpretação**: Um valor baixo de MSE indica que os erros do modelo são pequenos na média e que grandes erros são raros, pois os erros são elevados ao quadrado, amplificando erros maiores.

**Mean Absolute Error (MAE)**:
   - **O que é**: Mede a média das diferenças absolutas entre os valores previstos e os valores reais.
   - **Valor**: 0.2153
   - **Interpretação**: Um valor baixo de MAE indica que, em média, os erros do modelo são pequenos. É uma métrica mais direta e menos sensível a grandes erros comparada à MSE.

**R² Score (Coeficiente de Determinação)**:
   - **O que é**: Mede a proporção da variabilidade dos dados que é explicada pelo modelo.
   - **Valor**: 0.9959
   - **Interpretação**: Um R² Score próximo de 1 indica que o modelo explica quase toda a variabilidade dos dados, sugerindo um ajuste excelente.

# Imputando os valores nulos

In [14]:
# Imputar os valores ausentes no dataset original
df_imputed = impute_missing_values(df_meta_dados, target_column, best_model,scaler,label_encoder)
df_imputed.head()

Unnamed: 0,mes,hora,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,temperatura_bulbo_hora,temperatura_orvalho_hora,temperatura_max,...,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade,data,id_estacao,ano
0,4,3.0,1.8,997.9,998.7,997.8,,24.2,24.0,24.3,...,23.6,99.0,96.0,99.0,287.0,4.2,1.9,2023-04-09,A340,2023
1,4,12.0,0.0,987.0,987.3,986.6,415.8,23.2,23.0,23.2,...,22.2,99.0,99.0,99.0,244.0,5.0,1.2,2023-04-09,A367,2023
2,4,0.0,0.0,1007.1,1007.1,1006.4,0.0,24.7,24.6,25.1,...,24.6,99.0,99.0,99.0,133.0,3.8,1.9,2023-04-09,A308,2023
3,12,15.0,0.0,989.2,990.4,989.2,2249.1,31.5,31.4,31.7,...,30.0,100.0,99.0,99.0,,,,2023-12-21,A369,2023
4,12,18.0,0.0,981.8,982.4,981.8,200.0,24.0,23.8,24.1,...,23.6,99.0,98.0,99.0,149.0,6.3,3.2,2023-12-21,A453,2023


In [15]:
df_imputed.isnull().sum()

mes                              0
hora                             0
precipitacao_total          223159
pressao_atm_hora            145676
pressao_atm_max             146370
pressao_atm_min             146373
radiacao_global             394729
temperatura_bulbo_hora      147828
temperatura_orvalho_hora    182632
temperatura_max                  0
temperatura_min             148496
temperatura_orvalho_max     184453
temperatura_orvalho_min     184468
umidade_rel_max             176355
umidade_rel_min             176080
umidade_rel_hora            174502
vento_direcao               184662
vento_rajada_max            185800
vento_velocidade            179944
data                             0
id_estacao                       0
ano                              0
dtype: int64

# Processo para todos os anos

In [14]:
# Lista das colunas alvo
target_columns = [
    'temperatura_max',
    'temperatura_min'
]

# Lista de anos que deseja processar
years = [2000,2001,2002,2003,2004,2005,2006,2007,2008,
        2009,2010,2011,2012,2013,2014,2015,2016,2017,
        2018,2019,2020,2021,2022]

for year in years:
    # Recuperando os dados das estações para o ano específico
    df_meta_dados = fetch_inmet_data([year])
    
    if not df_meta_dados.empty:
        # Executar o pipeline
        df_imputed = run_pipeline(df_meta_dados, target_columns)
       
        # Salvar o dataset com valores imputados
        file_path = f'../dados/estacoes/dataset_imputed_{year}.csv'
        df_imputed.to_csv(file_path, index=False)
        print(f"Dataset imputed para {year} salvo com sucesso em {file_path}")
    else:
        print(f"Nenhum dado encontrado para o ano de {year}. O pipeline não foi executado.")

Downloading: 0rows [00:00, ?rows/s]


Nenhum dado encontrado para o ano de 2000. O pipeline não foi executado.


Downloading: 0rows [00:00, ?rows/s]


Nenhum dado encontrado para o ano de 2001. O pipeline não foi executado.


Downloading: 0rows [00:00, ?rows/s]


Nenhum dado encontrado para o ano de 2002. O pipeline não foi executado.


Downloading: 100%|██████████| 46848/46848 [00:11<00:00, 4009.14rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.10427949107610598
Mean Absolute Error: 0.21880577713695923
R² Score: 0.9930003573212488
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.04481807761773611
Mean Absolute Error: 0.15265217478087204
R² Score: 0.9963645177265077
Remaining missing values in temperatura_min: 0
Dataset imputed para 2003 salvo com sucesso em ../dados/estacoes/dataset_imputed_2003.csv


Downloading: 100%|██████████| 54816/54816 [00:13<00:00, 3928.06rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09359245155106295
Mean Absolute Error: 0.20985348604462192
R² Score: 0.9934681864182601
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.04126670210634289
Mean Absolute Error: 0.14075581556089556
R² Score: 0.996584255027813
Remaining missing values in temperatura_min: 0
Dataset imputed para 2004 salvo com sucesso em ../dados/estacoes/dataset_imputed_2004.csv


Downloading: 100%|██████████| 70080/70080 [00:18<00:00, 3730.34rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07999155676305454
Mean Absolute Error: 0.19890743397777869
R² Score: 0.9962962500112705
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.06721605987749885
Mean Absolute Error: 0.18676321817481
R² Score: 0.9962962803915816
Remaining missing values in temperatura_min: 0
Dataset imputed para 2005 salvo com sucesso em ../dados/estacoes/dataset_imputed_2005.csv


Downloading: 100%|██████████| 70320/70320 [00:18<00:00, 3771.52rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09054714787813421
Mean Absolute Error: 0.21407723516689348
R² Score: 0.9950112929265666
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07786918604840061
Mean Absolute Error: 0.20904540774806968
R² Score: 0.9951275182717402
Remaining missing values in temperatura_min: 0
Dataset imputed para 2006 salvo com sucesso em ../dados/estacoes/dataset_imputed_2006.csv


Downloading: 100%|██████████| 192696/192696 [00:50<00:00, 3846.61rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08147103679615027
Mean Absolute Error: 0.20398420465604278
R² Score: 0.9963886272552847
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07850535218186516
Mean Absolute Error: 0.21059047637600842
R² Score: 0.9962452382791518
Remaining missing values in temperatura_min: 0
Dataset imputed para 2007 salvo com sucesso em ../dados/estacoes/dataset_imputed_2007.csv


Downloading: 100%|██████████| 498720/498720 [02:16<00:00, 3642.99rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08044554543825397
Mean Absolute Error: 0.20766484765932613
R² Score: 0.9962350552753793
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0847663518780839
Mean Absolute Error: 0.21595443769453612
R² Score: 0.9957693388486815
Remaining missing values in temperatura_min: 0
Dataset imputed para 2008 salvo com sucesso em ../dados/estacoes/dataset_imputed_2008.csv


Downloading: 100%|██████████| 610536/610536 [02:54<00:00, 3506.29rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07947730345358883
Mean Absolute Error: 0.20162399690265384
R² Score: 0.9958484247608584
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07731046845810098
Mean Absolute Error: 0.20457658641748253
R² Score: 0.9955076469263372
Remaining missing values in temperatura_min: 0
Dataset imputed para 2009 salvo com sucesso em ../dados/estacoes/dataset_imputed_2009.csv


Downloading: 100%|██████████| 624792/624792 [02:52<00:00, 3616.51rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0843359782604267
Mean Absolute Error: 0.20831852389733851
R² Score: 0.996024641968454
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08422623203618083
Mean Absolute Error: 0.2143983157162858
R² Score: 0.9956478215463234
Remaining missing values in temperatura_min: 0
Dataset imputed para 2010 salvo com sucesso em ../dados/estacoes/dataset_imputed_2010.csv


Downloading: 100%|██████████| 639480/639480 [02:56<00:00, 3629.68rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08148808988945352
Mean Absolute Error: 0.20853378226015104
R² Score: 0.9958120203205271
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08541608448881684
Mean Absolute Error: 0.21465796092354658
R² Score: 0.995188643091749
Remaining missing values in temperatura_min: 0
Dataset imputed para 2011 salvo com sucesso em ../dados/estacoes/dataset_imputed_2011.csv


Downloading: 100%|██████████| 641232/641232 [03:00<00:00, 3546.44rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08214029242084533
Mean Absolute Error: 0.208203209847914
R² Score: 0.9962537162193514
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08391685145390786
Mean Absolute Error: 0.21308620049365884
R² Score: 0.9958878470956363
Remaining missing values in temperatura_min: 0
Dataset imputed para 2012 salvo com sucesso em ../dados/estacoes/dataset_imputed_2012.csv


Downloading: 100%|██████████| 639480/639480 [02:59<00:00, 3566.52rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08003095811197238
Mean Absolute Error: 0.2043915813217209
R² Score: 0.996082241377345
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08149391520340875
Mean Absolute Error: 0.21140957402649013
R² Score: 0.9956982379906684
Remaining missing values in temperatura_min: 0
Dataset imputed para 2013 salvo com sucesso em ../dados/estacoes/dataset_imputed_2013.csv


Downloading: 100%|██████████| 639480/639480 [03:01<00:00, 3515.91rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08028863150193752
Mean Absolute Error: 0.20738400674692387
R² Score: 0.9959182474609621
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08555880767031378
Mean Absolute Error: 0.21901846158248817
R² Score: 0.9953270006177695
Remaining missing values in temperatura_min: 0
Dataset imputed para 2014 salvo com sucesso em ../dados/estacoes/dataset_imputed_2014.csv


Downloading: 100%|██████████| 646080/646080 [03:00<00:00, 3583.14rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08556291418730116
Mean Absolute Error: 0.21317126172792475
R² Score: 0.996166544489007
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08984003686760879
Mean Absolute Error: 0.22325723082576573
R² Score: 0.9957538698130043
Remaining missing values in temperatura_min: 0
Dataset imputed para 2015 salvo com sucesso em ../dados/estacoes/dataset_imputed_2015.csv


Downloading: 100%|██████████| 676128/676128 [03:15<00:00, 3451.95rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0821820890682548
Mean Absolute Error: 0.2093922834763673
R² Score: 0.9960256418897627
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08619577347819829
Mean Absolute Error: 0.21998669766985196
R² Score: 0.9956019500975881
Remaining missing values in temperatura_min: 0
Dataset imputed para 2016 salvo com sucesso em ../dados/estacoes/dataset_imputed_2016.csv


Downloading: 100%|██████████| 712656/712656 [03:13<00:00, 3673.75rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08338018355017325
Mean Absolute Error: 0.20980682602607703
R² Score: 0.9960950366514163
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09418487479217874
Mean Absolute Error: 0.2206025851614154
R² Score: 0.9952799604420175
Remaining missing values in temperatura_min: 0
Dataset imputed para 2017 salvo com sucesso em ../dados/estacoes/dataset_imputed_2017.csv


Downloading: 100%|██████████| 760968/760968 [06:08<00:00, 2063.15rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08535896986224215
Mean Absolute Error: 0.21100413865518766
R² Score: 0.9957349434672537
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09074832559437383
Mean Absolute Error: 0.22449975930975852
R² Score: 0.9951964741735337
Remaining missing values in temperatura_min: 0
Dataset imputed para 2018 salvo com sucesso em ../dados/estacoes/dataset_imputed_2018.csv


Downloading: 100%|██████████| 762120/762120 [03:29<00:00, 3644.52rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09052600549241696
Mean Absolute Error: 0.22022216847673637
R² Score: 0.9957552207881025
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09448949531003095
Mean Absolute Error: 0.22988303828700174
R² Score: 0.9952976772788868
Remaining missing values in temperatura_min: 0
Dataset imputed para 2019 salvo com sucesso em ../dados/estacoes/dataset_imputed_2019.csv


Downloading: 100%|██████████| 764208/764208 [03:12<00:00, 3962.95rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.091295617180823
Mean Absolute Error: 0.21926755178301707
R² Score: 0.9951400037078093
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09939132517448311
Mean Absolute Error: 0.23326812718537385
R² Score: 0.9943480647084225
Remaining missing values in temperatura_min: 0
Dataset imputed para 2020 salvo com sucesso em ../dados/estacoes/dataset_imputed_2020.csv


Downloading: 100%|██████████| 762120/762120 [02:46<00:00, 4581.01rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08724853847435791
Mean Absolute Error: 0.2166902551065964
R² Score: 0.9956557751835607
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.10142665688951939
Mean Absolute Error: 0.2392946548048403
R² Score: 0.9946461481150672
Remaining missing values in temperatura_min: 0
Dataset imputed para 2021 salvo com sucesso em ../dados/estacoes/dataset_imputed_2021.csv


Downloading: 100%|██████████| 753360/753360 [02:43<00:00, 4611.49rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08867357616307385
Mean Absolute Error: 0.21635690118744647
R² Score: 0.9954780425006431
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09870316159768011
Mean Absolute Error: 0.2343711406392511
R² Score: 0.9946824934873278
Remaining missing values in temperatura_min: 0
Dataset imputed para 2022 salvo com sucesso em ../dados/estacoes/dataset_imputed_2022.csv
