# Importando bibliotecas

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

In [2]:
df = pd.read_csv('../databases/dataset2001_2024.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201600 entries, 0 to 201599
Data columns (total 18 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   YEAR                 201600 non-null  int64  
 1   MO                   201600 non-null  int64  
 2   DY                   201600 non-null  int64  
 3   HR                   201600 non-null  int64  
 4   ALLSKY_SFC_UV_INDEX  201600 non-null  float64
 5   ALLSKY_SRF_ALB       201600 non-null  float64
 6   CLRSKY_KT            201600 non-null  float64
 7   CLOUD_AMT            201600 non-null  float64
 8   T2M                  201600 non-null  float64
 9   PS                   201600 non-null  float64
 10  PW                   201600 non-null  float64
 11  WD10M                201600 non-null  float64
 12  WD50M                201600 non-null  float64
 13  WS50M                201600 non-null  float64
 14  WS10M                201600 non-null  float64
 15  TOA_SW_DNI       

## Pré-processamento

In [3]:
df.replace(-999.00, np.nan, inplace=True)
df.fillna(df.mean(), inplace=True)

In [4]:
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df.iloc[:, 2:])  # Excluímos DY e HR para evitar enviesamento temporal 

In [6]:
normalized_df = pd.DataFrame(normalized_data, columns=df.columns[2:])
normalized_df

Unnamed: 0,DY,HR,ALLSKY_SFC_UV_INDEX,ALLSKY_SRF_ALB,CLRSKY_KT,CLOUD_AMT,T2M,PS,PW,WD10M,WD50M,WS50M,WS10M,TOA_SW_DNI,QV2M,QV10M
0,0.0,0.000000,0.000000,0.516858,0.754395,0.3270,0.476855,0.432143,0.473786,0.389148,0.387748,0.385520,0.352206,0.000000,0.621997,0.661822
1,0.0,0.043478,0.000000,0.516858,0.754395,0.2488,0.466770,0.435714,0.462136,0.384592,0.382887,0.381807,0.336386,0.000000,0.609183,0.648407
2,0.0,0.086957,0.000000,0.516858,0.754395,0.5142,0.458236,0.421429,0.452427,0.383647,0.381859,0.375619,0.322231,0.000000,0.599573,0.638345
3,0.0,0.130435,0.000000,0.516858,0.754395,0.6636,0.451254,0.403571,0.440777,0.382564,0.380581,0.363861,0.300583,0.000000,0.589429,0.631079
4,0.0,0.173913,0.000000,0.516858,0.754395,0.5116,0.444789,0.389286,0.431068,0.383203,0.381164,0.347153,0.277269,0.000000,0.583022,0.624371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201595,1.0,0.826087,0.157158,0.555556,0.807229,0.3179,0.711404,0.410714,0.473786,0.431878,0.430060,0.397896,0.469609,0.999163,0.498131,0.525433
201596,1.0,0.869565,0.043057,0.481481,0.698795,0.2045,0.656323,0.428571,0.469903,0.423654,0.422475,0.454827,0.507910,0.999128,0.511479,0.546115
201597,1.0,0.913043,0.002691,0.592593,0.457831,0.3140,0.598138,0.453571,0.469903,0.413097,0.412418,0.487005,0.515404,0.999234,0.530699,0.566238
201598,1.0,0.956522,0.000000,0.516858,0.754395,0.2106,0.552883,0.478571,0.471845,0.404512,0.404528,0.457921,0.465445,0.000000,0.550454,0.586920


Por exemplo, podemos definir como 40 °C

In [10]:
threshold_temp = 40  # Limite para definir evento extremo
df['extreme_event'] = (df['T2M'] > threshold_temp).astype(int)  # 1 para evento extremo, 0 caso contrário