In [1]:
import sys
import os

# Adiciona o diretório src ao sys.path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

# Importa os módulos
from data_fetching import fetch_inmet_data
from regression_pipeline import preprocess_data, train_and_evaluate_model, impute_missing_values, run_pipeline

In [2]:
import pandas as pd
import geopandas as gpd

# Composição do dataset

In [3]:
df_meta_dados = pd.read_csv('../dados/meta_dados_estacoes_2023.csv')
df_meta_dados.head()

Unnamed: 0,ano,mes,data,hora,id_estacao,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,...,temperatura_max,temperatura_min,temperatura_orvalho_max,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade
0,2023,4,2023-04-09,03:00:00,A340,1.8,997.9,998.7,997.8,,...,24.3,24.1,24.1,23.6,99.0,96.0,99.0,287.0,4.2,1.9
1,2023,4,2023-04-09,12:00:00,A367,0.0,987.0,987.3,986.6,415.8,...,23.2,22.3,23.0,22.2,99.0,99.0,99.0,244.0,5.0,1.2
2,2023,4,2023-04-09,00:00:00,A308,0.0,1007.1,1007.1,1006.4,0.0,...,25.1,24.7,24.8,24.6,99.0,99.0,99.0,133.0,3.8,1.9
3,2023,12,2023-12-21,15:00:00,A369,0.0,989.2,990.4,989.2,2249.1,...,31.7,28.4,31.7,30.0,100.0,99.0,99.0,,,
4,2023,12,2023-12-21,18:00:00,A453,0.0,981.8,982.4,981.8,200.0,...,24.1,23.8,23.9,23.6,99.0,98.0,99.0,149.0,6.3,3.2


Vamos utilizar apenas a temperatura máxima e mínima do dia

In [4]:
# remover colunas desnecessárias
df_meta_dados_novo = df_meta_dados.drop(columns=['radiacao_global', 'temperatura_bulbo_hora', 'temperatura_orvalho_hora',
                                            'temperatura_orvalho_max', 'temperatura_orvalho_min', 'umidade_rel_max',
                                            'umidade_rel_min', 'umidade_rel_hora','mes','ano'])

df_meta_dados_novo.head()

Unnamed: 0,data,hora,id_estacao,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,temperatura_max,temperatura_min,vento_direcao,vento_rajada_max,vento_velocidade
0,2023-04-09,03:00:00,A340,1.8,997.9,998.7,997.8,24.3,24.1,287.0,4.2,1.9
1,2023-04-09,12:00:00,A367,0.0,987.0,987.3,986.6,23.2,22.3,244.0,5.0,1.2
2,2023-04-09,00:00:00,A308,0.0,1007.1,1007.1,1006.4,25.1,24.7,133.0,3.8,1.9
3,2023-12-21,15:00:00,A369,0.0,989.2,990.4,989.2,31.7,28.4,,,
4,2023-12-21,18:00:00,A453,0.0,981.8,982.4,981.8,24.1,23.8,149.0,6.3,3.2


In [5]:
# Agrupar por dia pegando a temperatura máxima e mínima
df_meta_dados_novo = df_meta_dados_novo.groupby(['id_estacao','data']).agg({'temperatura_max':'max','temperatura_min':'min'}).reset_index()
df_meta_dados_novo.head()

Unnamed: 0,id_estacao,data,temperatura_max,temperatura_min
0,A305,2023-01-01,32.4,25.9
1,A305,2023-01-02,32.6,24.4
2,A305,2023-01-03,32.6,24.6
3,A305,2023-01-04,32.7,25.2
4,A305,2023-01-05,32.9,25.5


In [6]:
# Varificando se as estações possuem todos os dias de 2023
df_meta_dados_novo['id_estacao'].value_counts().loc[lambda x : x < 364]

Series([], Name: id_estacao, dtype: int64)

Todas as estações possuem uma contagem de pelo menos 364 dias

In [7]:
df_meta_dados_novo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30576 entries, 0 to 30575
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id_estacao       30576 non-null  object 
 1   data             30576 non-null  object 
 2   temperatura_max  26142 non-null  float64
 3   temperatura_min  26142 non-null  float64
dtypes: float64(2), object(2)
memory usage: 955.6+ KB


Temos uma quantidade muito grande de valores nulos, vamos criar um modelo para tentar inferir esses valores

In [8]:
df_meta_dados = pd.read_csv('../dados/meta_dados_estacoes_2023.csv')
df_meta_dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 733824 entries, 0 to 733823
Data columns (total 22 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   ano                       733824 non-null  int64  
 1   mes                       733824 non-null  int64  
 2   data                      733824 non-null  object 
 3   hora                      733824 non-null  object 
 4   id_estacao                733824 non-null  object 
 5   precipitacao_total        510665 non-null  float64
 6   pressao_atm_hora          588148 non-null  float64
 7   pressao_atm_max           587454 non-null  float64
 8   pressao_atm_min           587451 non-null  float64
 9   radiacao_global           339095 non-null  float64
 10  temperatura_bulbo_hora    585996 non-null  float64
 11  temperatura_orvalho_hora  551192 non-null  float64
 12  temperatura_max           585332 non-null  float64
 13  temperatura_min           585328 non-null  f

In [9]:
df_meta_dados.isnull().sum()

ano                              0
mes                              0
data                             0
hora                             0
id_estacao                       0
precipitacao_total          223159
pressao_atm_hora            145676
pressao_atm_max             146370
pressao_atm_min             146373
radiacao_global             394729
temperatura_bulbo_hora      147828
temperatura_orvalho_hora    182632
temperatura_max             148492
temperatura_min             148496
temperatura_orvalho_max     184453
temperatura_orvalho_min     184468
umidade_rel_max             176355
umidade_rel_min             176080
umidade_rel_hora            174502
vento_direcao               184662
vento_rajada_max            185800
vento_velocidade            179944
dtype: int64

In [10]:
df_meta_dados.head()

Unnamed: 0,ano,mes,data,hora,id_estacao,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,...,temperatura_max,temperatura_min,temperatura_orvalho_max,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade
0,2023,4,2023-04-09,03:00:00,A340,1.8,997.9,998.7,997.8,,...,24.3,24.1,24.1,23.6,99.0,96.0,99.0,287.0,4.2,1.9
1,2023,4,2023-04-09,12:00:00,A367,0.0,987.0,987.3,986.6,415.8,...,23.2,22.3,23.0,22.2,99.0,99.0,99.0,244.0,5.0,1.2
2,2023,4,2023-04-09,00:00:00,A308,0.0,1007.1,1007.1,1006.4,0.0,...,25.1,24.7,24.8,24.6,99.0,99.0,99.0,133.0,3.8,1.9
3,2023,12,2023-12-21,15:00:00,A369,0.0,989.2,990.4,989.2,2249.1,...,31.7,28.4,31.7,30.0,100.0,99.0,99.0,,,
4,2023,12,2023-12-21,18:00:00,A453,0.0,981.8,982.4,981.8,200.0,...,24.1,23.8,23.9,23.6,99.0,98.0,99.0,149.0,6.3,3.2


In [11]:
# Nome da coluna alvo
target_column = 'temperatura_max'

# Processar os dados
df, X_train_scaled, X_test_scaled, y_train, y_test, scaler, label_encoder = preprocess_data(df_meta_dados, target_column)
df.head()

Unnamed: 0,mes,hora,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,temperatura_bulbo_hora,temperatura_orvalho_hora,temperatura_max,...,temperatura_orvalho_max,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade,dia,id_estacao_encoded
1,4,12.0,0.0,987.0,987.3,986.6,415.8,23.2,23.0,23.2,...,23.0,22.2,99.0,99.0,99.0,244.0,5.0,1.2,9,44
2,4,0.0,0.0,1007.1,1007.1,1006.4,0.0,24.7,24.6,25.1,...,24.8,24.6,99.0,99.0,99.0,133.0,3.8,1.9,9,3
4,12,18.0,0.0,981.8,982.4,981.8,200.0,24.0,23.8,24.1,...,23.9,23.6,99.0,98.0,99.0,149.0,6.3,3.2,21,77
8,12,9.0,0.0,983.3,983.3,982.7,81.7,23.1,22.9,23.1,...,23.0,22.9,99.0,99.0,99.0,108.0,5.1,2.3,22,77
10,4,8.0,0.0,951.9,951.9,951.6,5.4,21.5,21.3,21.6,...,21.3,21.2,99.0,98.0,99.0,188.0,4.9,2.3,11,6


In [12]:
df.isnull().sum()

mes                         0
hora                        0
precipitacao_total          0
pressao_atm_hora            0
pressao_atm_max             0
pressao_atm_min             0
radiacao_global             0
temperatura_bulbo_hora      0
temperatura_orvalho_hora    0
temperatura_max             0
temperatura_min             0
temperatura_orvalho_max     0
temperatura_orvalho_min     0
umidade_rel_max             0
umidade_rel_min             0
umidade_rel_hora            0
vento_direcao               0
vento_rajada_max            0
vento_velocidade            0
dia                         0
id_estacao_encoded          0
dtype: int64

# XGBoostRegressor

In [13]:
# Treinar o modelo e avaliar seu desempenho
best_model = train_and_evaluate_model(X_train_scaled, X_test_scaled, y_train, y_test)

Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08503053840820482
Mean Absolute Error: 0.21055169404267499
R² Score: 0.9940648803177032


**Mean Squared Error (MSE)**:
   - **O que é**: Mede a média dos quadrados dos erros entre os valores previstos e os valores reais.
   - **Valor**: 0.0877
   - **Interpretação**: Um valor baixo de MSE indica que os erros do modelo são pequenos na média e que grandes erros são raros, pois os erros são elevados ao quadrado, amplificando erros maiores.

**Mean Absolute Error (MAE)**:
   - **O que é**: Mede a média das diferenças absolutas entre os valores previstos e os valores reais.
   - **Valor**: 0.2153
   - **Interpretação**: Um valor baixo de MAE indica que, em média, os erros do modelo são pequenos. É uma métrica mais direta e menos sensível a grandes erros comparada à MSE.

**R² Score (Coeficiente de Determinação)**:
   - **O que é**: Mede a proporção da variabilidade dos dados que é explicada pelo modelo.
   - **Valor**: 0.9959
   - **Interpretação**: Um R² Score próximo de 1 indica que o modelo explica quase toda a variabilidade dos dados, sugerindo um ajuste excelente.

# Imputando os valores nulos

In [14]:
# Imputar os valores ausentes no dataset original
df_imputed = impute_missing_values(df_meta_dados, target_column, best_model,scaler,label_encoder)
df_imputed.head()

Unnamed: 0,mes,hora,precipitacao_total,pressao_atm_hora,pressao_atm_max,pressao_atm_min,radiacao_global,temperatura_bulbo_hora,temperatura_orvalho_hora,temperatura_max,...,temperatura_orvalho_min,umidade_rel_max,umidade_rel_min,umidade_rel_hora,vento_direcao,vento_rajada_max,vento_velocidade,data,id_estacao,ano
0,4,3.0,1.8,997.9,998.7,997.8,,24.2,24.0,24.3,...,23.6,99.0,96.0,99.0,287.0,4.2,1.9,2023-04-09,A340,2023
1,4,12.0,0.0,987.0,987.3,986.6,415.8,23.2,23.0,23.2,...,22.2,99.0,99.0,99.0,244.0,5.0,1.2,2023-04-09,A367,2023
2,4,0.0,0.0,1007.1,1007.1,1006.4,0.0,24.7,24.6,25.1,...,24.6,99.0,99.0,99.0,133.0,3.8,1.9,2023-04-09,A308,2023
3,12,15.0,0.0,989.2,990.4,989.2,2249.1,31.5,31.4,31.7,...,30.0,100.0,99.0,99.0,,,,2023-12-21,A369,2023
4,12,18.0,0.0,981.8,982.4,981.8,200.0,24.0,23.8,24.1,...,23.6,99.0,98.0,99.0,149.0,6.3,3.2,2023-12-21,A453,2023


In [15]:
df_imputed.isnull().sum()

mes                              0
hora                             0
precipitacao_total          223159
pressao_atm_hora            145676
pressao_atm_max             146370
pressao_atm_min             146373
radiacao_global             394729
temperatura_bulbo_hora      147828
temperatura_orvalho_hora    182632
temperatura_max                  0
temperatura_min             148496
temperatura_orvalho_max     184453
temperatura_orvalho_min     184468
umidade_rel_max             176355
umidade_rel_min             176080
umidade_rel_hora            174502
vento_direcao               184662
vento_rajada_max            185800
vento_velocidade            179944
data                             0
id_estacao                       0
ano                              0
dtype: int64

# Processo para todos os anos

In [3]:
# Lista das colunas alvo
target_columns = [
    'temperatura_max',
    'temperatura_min'
]

# Lista de anos que deseja processar
years = [2003,2004,2005,2006,2007,2008,
        2009,2010,2011,2012,2013,2014,2015,2016,2017,
        2018,2019,2020,2021,2022]

for year in years:
    # Recuperando os dados das estações para o ano específico
    df_meta_dados = fetch_inmet_data([year])
    
    if not df_meta_dados.empty:
        # Executar o pipeline
        df_imputed = run_pipeline(df_meta_dados, target_columns)
       
        # Salvar o dataset com valores imputados
        file_path = f'../dados/estacoes/dataset_imputed_{year}.csv'
        df_imputed.to_csv(file_path, index=False)
        print(f"Dataset imputed para {year} salvo com sucesso em {file_path}")
    else:
        print(f"Nenhum dado encontrado para o ano de {year}. O pipeline não foi executado.")

Downloading: 100%|██████████| 46848/46848 [00:13<00:00, 3511.75rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08358399665499099
Mean Absolute Error: 0.20330022890633406
R² Score: 0.9926715925418328
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.04258075054771132
Mean Absolute Error: 0.14730346662087968
R² Score: 0.9953231780969892
Remaining missing values in temperatura_min: 0
Dataset imputed para 2003 salvo com sucesso em ../dados/estacoes/dataset_imputed_2003.csv


Downloading: 100%|██████████| 54816/54816 [00:15<00:00, 3629.05rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.11290881096876938
Mean Absolute Error: 0.21159787442671682
R² Score: 0.9895107557745365
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.048937590446271856
Mean Absolute Error: 0.1524785445908369
R² Score: 0.9944984638179029
Remaining missing values in temperatura_min: 0
Dataset imputed para 2004 salvo com sucesso em ../dados/estacoes/dataset_imputed_2004.csv


Downloading: 100%|██████████| 70080/70080 [00:20<00:00, 3491.48rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09250485524485547
Mean Absolute Error: 0.21470191138923828
R² Score: 0.9943952139230078
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.06964019770949265
Mean Absolute Error: 0.19040374012448333
R² Score: 0.9948271309617347
Remaining missing values in temperatura_min: 0
Dataset imputed para 2005 salvo com sucesso em ../dados/estacoes/dataset_imputed_2005.csv


Downloading: 100%|██████████| 70320/70320 [00:19<00:00, 3691.99rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0913284362057879
Mean Absolute Error: 0.21402008475137188
R² Score: 0.9931415576010315
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07150537320097175
Mean Absolute Error: 0.20165698687754094
R² Score: 0.9937010414436345
Remaining missing values in temperatura_min: 0
Dataset imputed para 2006 salvo com sucesso em ../dados/estacoes/dataset_imputed_2006.csv


Downloading: 100%|██████████| 192696/192696 [00:53<00:00, 3622.19rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07976166200641659
Mean Absolute Error: 0.20169766871584818
R² Score: 0.9944603310693442
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07474386660099012
Mean Absolute Error: 0.20399691465577457
R² Score: 0.9941322191810733
Remaining missing values in temperatura_min: 0
Dataset imputed para 2007 salvo com sucesso em ../dados/estacoes/dataset_imputed_2007.csv


Downloading: 100%|██████████| 498720/498720 [02:27<00:00, 3390.30rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07605290524264757
Mean Absolute Error: 0.19848221651865716
R² Score: 0.9941949452834757
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0763064457715801
Mean Absolute Error: 0.20365145376355862
R² Score: 0.9932853422554568
Remaining missing values in temperatura_min: 0
Dataset imputed para 2008 salvo com sucesso em ../dados/estacoes/dataset_imputed_2008.csv


Downloading: 100%|██████████| 610536/610536 [03:23<00:00, 2995.73rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08451187391112738
Mean Absolute Error: 0.19804519441539584
R² Score: 0.9937036407887225
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.06989089455592237
Mean Absolute Error: 0.19392831534971075
R² Score: 0.9939154844303779
Remaining missing values in temperatura_min: 0
Dataset imputed para 2009 salvo com sucesso em ../dados/estacoes/dataset_imputed_2009.csv


Downloading: 100%|██████████| 624792/624792 [03:34<00:00, 2909.25rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08099364509697021
Mean Absolute Error: 0.2037512930765944
R² Score: 0.9943958295277343
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07572747301281438
Mean Absolute Error: 0.20169965912624832
R² Score: 0.993902140863175
Remaining missing values in temperatura_min: 0
Dataset imputed para 2010 salvo com sucesso em ../dados/estacoes/dataset_imputed_2010.csv


Downloading: 100%|██████████| 639480/639480 [03:29<00:00, 3045.24rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08060709870822116
Mean Absolute Error: 0.20426849813740336
R² Score: 0.9939174916955856
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07763697598627951
Mean Absolute Error: 0.20267368309508046
R² Score: 0.9932160541424131
Remaining missing values in temperatura_min: 0
Dataset imputed para 2011 salvo com sucesso em ../dados/estacoes/dataset_imputed_2011.csv


Downloading: 100%|██████████| 641232/641232 [03:07<00:00, 3424.52rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07926168691952803
Mean Absolute Error: 0.2044825750713439
R² Score: 0.9946715361960267
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07694490660581775
Mean Absolute Error: 0.2045088735281089
R² Score: 0.9940548997380817
Remaining missing values in temperatura_min: 0
Dataset imputed para 2012 salvo com sucesso em ../dados/estacoes/dataset_imputed_2012.csv


Downloading: 100%|██████████| 639480/639480 [02:59<00:00, 3552.75rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07575088018251087
Mean Absolute Error: 0.1992227656705448
R² Score: 0.9947306574559853
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0750916811229251
Mean Absolute Error: 0.20421160907430438
R² Score: 0.9941308415551903
Remaining missing values in temperatura_min: 0
Dataset imputed para 2013 salvo com sucesso em ../dados/estacoes/dataset_imputed_2013.csv


Downloading: 100%|██████████| 639480/639480 [02:58<00:00, 3581.45rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07614251676020935
Mean Absolute Error: 0.20211911372510538
R² Score: 0.9943772215911421
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07773276027636843
Mean Absolute Error: 0.20640153383881668
R² Score: 0.9936022754189668
Remaining missing values in temperatura_min: 0
Dataset imputed para 2014 salvo com sucesso em ../dados/estacoes/dataset_imputed_2014.csv


Downloading: 100%|██████████| 646080/646080 [03:02<00:00, 3533.42rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08152934257962845
Mean Absolute Error: 0.20736473303255443
R² Score: 0.994398069519607
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08307289765516172
Mean Absolute Error: 0.21509053525543537
R² Score: 0.9936229616735799
Remaining missing values in temperatura_min: 0
Dataset imputed para 2015 salvo com sucesso em ../dados/estacoes/dataset_imputed_2015.csv


Downloading: 100%|██████████| 676128/676128 [03:10<00:00, 3548.30rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.07883053891564841
Mean Absolute Error: 0.20388592397776695
R² Score: 0.9945274930657859
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08050779228382712
Mean Absolute Error: 0.21169130585860974
R² Score: 0.9937643001789886
Remaining missing values in temperatura_min: 0
Dataset imputed para 2016 salvo com sucesso em ../dados/estacoes/dataset_imputed_2016.csv


Downloading: 100%|██████████| 712656/712656 [03:24<00:00, 3489.11rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08155865459077508
Mean Absolute Error: 0.20519633274733734
R² Score: 0.9942343266166812
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.0808892026152325
Mean Absolute Error: 0.21211311089081863
R² Score: 0.9935887247502622
Remaining missing values in temperatura_min: 0
Dataset imputed para 2017 salvo com sucesso em ../dados/estacoes/dataset_imputed_2017.csv


Downloading: 100%|██████████| 760968/760968 [03:35<00:00, 3533.62rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08398158979469522
Mean Absolute Error: 0.20965202499214006
R² Score: 0.9937636613569877
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08260143509605383
Mean Absolute Error: 0.21342197795285658
R² Score: 0.9931026051943002
Remaining missing values in temperatura_min: 0
Dataset imputed para 2018 salvo com sucesso em ../dados/estacoes/dataset_imputed_2018.csv


Downloading: 100%|██████████| 762120/762120 [03:39<00:00, 3477.09rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08925511002141845
Mean Absolute Error: 0.2179841301275588
R² Score: 0.9938295690878826
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08700262437773104
Mean Absolute Error: 0.21889872688656664
R² Score: 0.9932233456613995
Remaining missing values in temperatura_min: 0
Dataset imputed para 2019 salvo com sucesso em ../dados/estacoes/dataset_imputed_2019.csv


Downloading: 100%|██████████| 764208/764208 [03:14<00:00, 3931.10rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08796861474709468
Mean Absolute Error: 0.21507504083264764
R² Score: 0.992954978579692
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09187561992833378
Mean Absolute Error: 0.2236101382945208
R² Score: 0.9916569609460133
Remaining missing values in temperatura_min: 0
Dataset imputed para 2020 salvo com sucesso em ../dados/estacoes/dataset_imputed_2020.csv


Downloading: 100%|██████████| 762120/762120 [03:11<00:00, 3977.87rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08520421625897559
Mean Absolute Error: 0.21158892025782283
R² Score: 0.9933818638500513
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08986736342780609
Mean Absolute Error: 0.22393162752751697
R² Score: 0.9921426623946626
Remaining missing values in temperatura_min: 0
Dataset imputed para 2021 salvo com sucesso em ../dados/estacoes/dataset_imputed_2021.csv


Downloading: 100%|██████████| 753360/753360 [02:53<00:00, 4334.11rows/s]


Processing target column: temperatura_max
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.08665322271959683
Mean Absolute Error: 0.21289480849118797
R² Score: 0.9932064328515939
Remaining missing values in temperatura_max: 0
Processing target column: temperatura_min
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Mean Squared Error: 0.09074137281125112
Mean Absolute Error: 0.22560868226461095
R² Score: 0.9919042510278886
Remaining missing values in temperatura_min: 0
Dataset imputed para 2022 salvo com sucesso em ../dados/estacoes/dataset_imputed_2022.csv
