In [17]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, accuracy_score, recall_score
import matplotlib.pyplot as plt

In [2]:


# Carregando os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Imputando valores faltantes para a coluna 'Temp. Ins. (C)' em df3
df3['Temp. Ins. (C)'] = df3['Temp. Ins. (C)'].fillna(df3['Temp. Ins. (C)'].mean())

# Concatenando os DataFrames
df = pd.concat([df1, df2, df3])

# Transformando a coluna 'num_ocorrencias' em binária (0: não ocorreu, 1: ocorreu)
df['num_ocorrencias'] = df['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)

# Separando a variável alvo
y = df['num_ocorrencias']

# Definindo as features (X), excluindo a coluna alvo
X = df.drop(columns=['num_ocorrencias'])

# Normalizando os dados 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Separando em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Inicializando o SMOTE
smote = SMOTE(sampling_strategy='minority', random_state=42)

# Aplicando o SMOTE nos dados de treino
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Verificando a quantidade de amostras após o SMOTE
print(f'Antes do SMOTE: {len(y_train[y_train == 1])} quedas de energia.')
print(f'Depois do SMOTE: {len(y_resampled[y_resampled == 1])} quedas de energia.')

# Modelo LSTM
model = Sequential()
model.add(LSTM(100, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# Compilando o modelo
model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])

# Treinando o modelo
model.fit(X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[1], 1), y_resampled, epochs=50, batch_size=32, validation_split=0.2)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023.h5')


Antes do SMOTE: 29428 quedas de energia.
Depois do SMOTE: 73593 quedas de energia.
Epoch 1/50


  super().__init__(**kwargs)


[1m3680/3680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 12ms/step - accuracy: 0.6282 - loss: 0.6540 - val_accuracy: 0.0236 - val_loss: 1.0136
Epoch 2/50
[1m3680/3680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 12ms/step - accuracy: 0.6334 - loss: 0.6479 - val_accuracy: 0.1318 - val_loss: 0.9577
Epoch 3/50
[1m3680/3680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 12ms/step - accuracy: 0.6348 - loss: 0.6457 - val_accuracy: 0.1731 - val_loss: 0.9168
Epoch 4/50
[1m3680/3680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 12ms/step - accuracy: 0.6321 - loss: 0.6468 - val_accuracy: 0.2658 - val_loss: 0.8642
Epoch 5/50
[1m3680/3680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 12ms/step - accuracy: 0.6328 - loss: 0.6461 - val_accuracy: 0.0345 - val_loss: 1.0599
Epoch 6/50
[1m3680/3680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 14ms/step - accuracy: 0.6314 - loss: 0.6457 - val_accuracy: 0.0707 - val_loss: 1.0610
Epoch 7/50
[1m

<keras.src.callbacks.history.History at 0x1ab92095070>

In [12]:
# Fazer previsões no conjunto de teste
y_pred = model.predict(X_test)


# Converter previsões e verdadeiras etiquetas para binário se necessário
threshold = 0.5
y_pred_binary = (y_pred > threshold).astype(int)
y_test_binary = (y_test > threshold).astype(int)

# Avaliação do modelo
accuracy = accuracy_score(y_test_binary, y_pred_binary)
recall = recall_score(y_test_binary, y_pred_binary)
roc_auc = roc_auc_score(y_test_binary, y_pred_binary)
conf_matrix = confusion_matrix(y_test_binary, y_pred_binary)

# Cálculo da especificidade
tn, fp, fn, tp = conf_matrix.ravel()
specificity = tn / (tn + fp)

# Exibir resultados
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')

[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
Acurácia: 0.7041
Sensibilidade (Recall): 0.1789
AUC: 0.5481
Especificidade: 0.9174


Smote_Previsao_3cvs_2019-2023_erro.h5

In [14]:
# Carregando os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Imputando valores faltantes para a coluna 'Temp. Ins. (C)' em df3
df3['Temp. Ins. (C)'] = df3['Temp. Ins. (C)'].fillna(df3['Temp. Ins. (C)'].mean())

# Concatenando os DataFrames
df = pd.concat([df1, df2, df3])

# Transformando a coluna 'num_ocorrencias' em binária (0: não ocorreu, 1: ocorreu)
df['num_ocorrencias'] = df['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)

# Separando a variável alvo
y = df['num_ocorrencias']

# Definindo as features (X), excluindo a coluna alvo
X = df.drop(columns=['num_ocorrencias'])

# Normalizando os dados 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Separando em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Inicializando o SMOTE
smote = SMOTE(sampling_strategy='minority', random_state=42)

# Aplicando o SMOTE nos dados de treino
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Verificando a quantidade de amostras após o SMOTE
print(f'Antes do SMOTE: {len(y_train[y_train == 1])} quedas de energia.')
print(f'Depois do SMOTE: {len(y_resampled[y_resampled == 1])} quedas de energia.')

# Modelo LSTM
model = Sequential()

model.add(LSTM(units=100, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=100, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(units=1, activation='sigmoid'))

# Compilando o modelo
model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])

# Treinando o modelo
model.fit(X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[1], 1), y_resampled, epochs=120, batch_size=100, validation_split=0.2)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_erro.h5')


Antes do SMOTE: 29428 quedas de energia.
Depois do SMOTE: 73593 quedas de energia.
Epoch 1/120


  super().__init__(**kwargs)


[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 22ms/step - accuracy: 0.6279 - loss: 0.6567 - val_accuracy: 0.1315 - val_loss: 0.9170
Epoch 2/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.6310 - loss: 0.6495 - val_accuracy: 0.0567 - val_loss: 0.9741
Epoch 3/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - accuracy: 0.6316 - loss: 0.6479 - val_accuracy: 0.1533 - val_loss: 0.9403
Epoch 4/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 18ms/step - accuracy: 0.6320 - loss: 0.6470 - val_accuracy: 0.0460 - val_loss: 1.0048
Epoch 5/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 18ms/step - accuracy: 0.6325 - loss: 0.6465 - val_accuracy: 0.2294 - val_loss: 0.9166
Epoch 6/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.6370 - loss: 0.6440 - val_accuracy: 0.1768 - val_loss: 0.9031
Epoch 7/12



Smote_Previsao_3cvs_2019-2023_v1

In [32]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd

# Carregar e preparar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Concatenar os DataFrames
df = pd.concat([df1, df2, df3])

# Criar a coluna de ocorrências (0 ou 1)
df['num_ocorrencias'] = df['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y = df['num_ocorrencias']
X = df.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes (preenchendo com a média das colunas, por exemplo)
imputer = SimpleImputer(strategy='mean')  # Você pode alterar para 'median' ou outro método
X = imputer.fit_transform(X)

# Escalonar os dados
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Aplicar SMOTE para balanceamento
smote = SMOTE(sampling_strategy='minority', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Verificar a forma de X_resampled
print(f"Forma de X_resampled: {X_resampled.shape}")  # Deve ser (n amostras, n_features)

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=100, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])

# Treinar o modelo, ajustando o reshape para incluir o terceiro eixo (n_features=1)
model.fit(
    X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[1], 1),  # reshape para (amostras, features, 1)
    y_resampled,
    epochs=120,
    batch_size=100,
    validation_split=0.2
)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_v1.h5')

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test.reshape((X_test.shape[0], X_resampled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')


Forma de X_resampled: (147186, 4)
Epoch 1/120


  super().__init__(**kwargs)


[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 19ms/step - accuracy: 0.6286 - loss: 0.6570 - val_accuracy: 0.0868 - val_loss: 0.9375
Epoch 2/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.6316 - loss: 0.6487 - val_accuracy: 0.0719 - val_loss: 0.9655
Epoch 3/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 22ms/step - accuracy: 0.6343 - loss: 0.6468 - val_accuracy: 0.1139 - val_loss: 0.9961
Epoch 4/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 18ms/step - accuracy: 0.6331 - loss: 0.6465 - val_accuracy: 0.0711 - val_loss: 1.0658
Epoch 5/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 17ms/step - accuracy: 0.6323 - loss: 0.6465 - val_accuracy: 0.1274 - val_loss: 0.9456
Epoch 6/120
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 19ms/step - accuracy: 0.6341 - loss: 0.6459 - val_accuracy: 0.1028 - val_loss: 0.9808
Epoch 7/12



[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
Acurácia: 0.7098
Sensibilidade (Recall): 0.1196
AUC: 0.5346
Especificidade: 0.9496


Modelo concatenaçao df1 e df2

In [35]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd

# Carregar e preparar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Concatenar os DataFrames df1 e df2 para treinamento
df_train = pd.concat([df1, df2])

# Criar a coluna de ocorrências (0 ou 1)
df_train['num_ocorrencias'] = df_train['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df_train['num_ocorrencias']
X_train = df_train.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes (preenchendo com a média das colunas, por exemplo)
imputer = SimpleImputer(strategy='mean')  
X_train = imputer.fit_transform(X_train)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Aplicar SMOTE para balanceamento
smote = SMOTE(sampling_strategy='minority', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)

# Verificar a forma de X_resampled
print(f"Forma de X_resampled: {X_resampled.shape}")  # Deve ser (n amostras, n_features)

# Preparar o conjunto de teste
df_test = df3.copy()
df_test['num_ocorrencias'] = df_test['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df_test['num_ocorrencias']
X_test = df_test.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes e escalar os dados de teste
X_test = imputer.transform(X_test)  # Usar o mesmo imputer
X_test_scaled = scaler.transform(X_test)  # Usar o mesmo scaler

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=100, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])

# Treinar o modelo, ajustando o reshape para incluir o terceiro eixo (n_features=1)
model.fit(
    X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[1], 1),  # reshape para (amostras, features, 1)
    y_resampled,
    epochs=30,  # Reduzir o número de epochs
    batch_size=100,
    validation_split=0.2
)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_v2.h5')

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_resampled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')


Forma de X_resampled: (133356, 4)


  super().__init__(**kwargs)


Epoch 1/30
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 28ms/step - accuracy: 0.6247 - loss: 0.6597 - val_accuracy: 0.0778 - val_loss: 1.0071
Epoch 2/30
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 26ms/step - accuracy: 0.6256 - loss: 0.6522 - val_accuracy: 0.0603 - val_loss: 0.9953
Epoch 3/30
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 21ms/step - accuracy: 0.6278 - loss: 0.6519 - val_accuracy: 0.0902 - val_loss: 1.0027
Epoch 4/30
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 23ms/step - accuracy: 0.6256 - loss: 0.6519 - val_accuracy: 0.0324 - val_loss: 0.9810
Epoch 5/30
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 26ms/step - accuracy: 0.6296 - loss: 0.6497 - val_accuracy: 0.0842 - val_loss: 0.9378
Epoch 6/30
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 26ms/step - accuracy: 0.6281 - loss: 0.6508 - val_accuracy: 0.1273 - val_loss: 0.9323
Epoc



[1m1331/1331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step
Acurácia: 0.6060
Sensibilidade (Recall): 0.1864
AUC: 0.5405
Especificidade: 0.8947


'Smote_Previsao_3cvs_2019-2023_v3.h5

In [37]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Carregar e preparar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Concatenar os DataFrames para treino (usando df1 e df2)
df_train = pd.concat([df1, df2])

# Criar a coluna de ocorrências (0 ou 1)
df_train['num_ocorrencias'] = df_train['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df_train['num_ocorrencias']
X_train = df_train.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes (preenchendo com a média das colunas)
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Aplicar SMOTE para balanceamento
smote = SMOTE(sampling_strategy='minority', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)

# Preparar os dados de teste
df_test = df3.copy()
df_test['num_ocorrencias'] = df_test['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df_test['num_ocorrencias']
X_test = df_test.drop('num_ocorrencias', axis=1)

# Imputar e escalar os dados de teste
X_test = imputer.transform(X_test)
X_test_scaled = scaler.transform(X_test)

# Verificar a forma de X_resampled
print(f"Forma de X_resampled: {X_resampled.shape}")

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=150, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=150, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Configurar callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', monitor='val_auc', save_best_only=True, mode='max', verbose=1)

# Treinar o modelo
model.fit(
    X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[1], 1),
    y_resampled,
    epochs=50,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, model_checkpoint]
)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_v3.h5')

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_resampled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')


Forma de X_resampled: (133356, 4)
Epoch 1/50


  super().__init__(**kwargs)


[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 24ms/step - accuracy: 0.6299 - loss: 0.6547 - val_accuracy: 0.0118 - val_loss: 1.0110
Epoch 2/50
[1m   8/1667[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25s[0m 15ms/step - accuracy: 0.6199 - loss: 0.6577

  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 27ms/step - accuracy: 0.6325 - loss: 0.6487 - val_accuracy: 0.1063 - val_loss: 0.9455
Epoch 3/50
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 25ms/step - accuracy: 0.6323 - loss: 0.6467 - val_accuracy: 0.1906 - val_loss: 0.9038
Epoch 4/50
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 24ms/step - accuracy: 0.6302 - loss: 0.6473 - val_accuracy: 0.1733 - val_loss: 0.9005
Epoch 5/50
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 27ms/step - accuracy: 0.6356 - loss: 0.6448 - val_accuracy: 0.1452 - val_loss: 0.9216
Epoch 6/50
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 23ms/step - accuracy: 0.6300 - loss: 0.6474 - val_accuracy: 0.1437 - val_loss: 0.9421
Epoch 7/50
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 21ms/step - accuracy: 0.6333 - loss: 0.6461 - val_accuracy: 0.1496 - val_loss: 0.9166
Epoch 8/50
[1m

Smote_Previsao_3cvs_2019-2023_v4.h5

In [39]:
# Importar bibliotecas necessárias
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Carregar e preparar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Concatenar os DataFrames para treino (usando df1 e df2)
df_train = pd.concat([df1, df2])

# Criar a coluna de ocorrências (0 ou 1)
df_train['num_ocorrencias'] = df_train['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df_train['num_ocorrencias']
X_train = df_train.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Aplicar SMOTE para balanceamento
smote = SMOTE(sampling_strategy='minority', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)

# Preparar os dados de teste
df_test = df3.copy()
df_test['num_ocorrencias'] = df_test['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df_test['num_ocorrencias']
X_test = df_test.drop('num_ocorrencias', axis=1)

# Imputar e escalar os dados de teste
X_test = imputer.transform(X_test)
X_test_scaled = scaler.transform(X_test)

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=150, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.3))  # Aumentar taxa de dropout
model.add(LSTM(units=150, return_sequences=True))  # Adicionar outra camada LSTM
model.add(Dropout(0.3))
model.add(LSTM(units=150, return_sequences=False))  # Última camada LSTM
model.add(Dropout(0.3))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Configurar callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', monitor='val_auc', save_best_only=True, mode='max', verbose=1)

# Treinar o modelo
model.fit(
    X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[1], 1),
    y_resampled,
    epochs=100,  # Aumentar número de épocas
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, model_checkpoint]
)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_v4.h5')

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_resampled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')


  super().__init__(**kwargs)


Epoch 1/100
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 36ms/step - accuracy: 0.6262 - loss: 0.6564 - val_accuracy: 0.0546 - val_loss: 1.0032
Epoch 2/100
[1m   7/1667[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m35s[0m 22ms/step - accuracy: 0.6264 - loss: 0.6599

  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 37ms/step - accuracy: 0.6310 - loss: 0.6475 - val_accuracy: 0.1308 - val_loss: 0.9109
Epoch 3/100
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 41ms/step - accuracy: 0.6316 - loss: 0.6479 - val_accuracy: 0.0612 - val_loss: 1.0183
Epoch 4/100
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 38ms/step - accuracy: 0.6344 - loss: 0.6461 - val_accuracy: 0.0851 - val_loss: 0.9673
Epoch 5/100
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 42ms/step - accuracy: 0.6327 - loss: 0.6471 - val_accuracy: 0.0841 - val_loss: 0.9386
Epoch 6/100
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 39ms/step - accuracy: 0.6310 - loss: 0.6474 - val_accuracy: 0.1349 - val_loss: 1.0292
Epoch 7/100
[1m1667/1667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 40ms/step - accuracy: 0.6317 - loss: 0.6457 - val_accuracy: 0.0992 - val_loss: 0.9776
Epoch 8/10



[1m1331/1331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 24ms/step
Acurácia: 0.5938
Sensibilidade (Recall): 0.3884
AUC: 0.5617
Especificidade: 0.7351


EROO: Smote_Previsao_3cvs_2019-2023_v5.h5

In [40]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler

# Carregar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')

# Criar a coluna de ocorrências (0 ou 1)
df1['num_ocorrencias'] = df1['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df1['num_ocorrencias']
X_train = df1.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Balancear os dados usando undersampling
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_train_scaled, y_train)

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=100, input_shape=(X_resampled.shape[1], 1), return_sequences=True))  # Primeira camada LSTM
model.add(Dropout(0.4))  # Camada Dropout
model.add(LSTM(units=100, return_sequences=True))  # Segunda camada LSTM
model.add(Dropout(0.4))  # Camada Dropout
model.add(LSTM(units=100, return_sequences=False))  # Última camada LSTM
model.add(Dropout(0.4))  # Camada Dropout
model.add(Dense(units=50, activation='relu'))  # Camada densa adicional
model.add(Dense(units=1, activation='sigmoid'))  # Camada de saída

# Compilar o modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Ajustar o X_resampled para o formato correto
X_resampled_reshaped = X_resampled.reshape((X_resampled.shape[0], X_resampled.shape[1], 1))

# Treinar o modelo
model.fit(
    X_resampled_reshaped,
    y_resampled,
    epochs=150,  # Aumentando o número de épocas
    batch_size=64,
    validation_split=0.2
)

# Preparar os dados de teste
df2['num_ocorrencias'] = df2['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df2['num_ocorrencias']
X_test = df2.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes no conjunto de teste
X_test = imputer.transform(X_test)
X_test_scaled = scaler.transform(X_test)

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')


Epoch 1/150


  super().__init__(**kwargs)


[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 28ms/step - accuracy: 0.6249 - loss: 0.6627 - val_accuracy: 0.1470 - val_loss: 0.9360
Epoch 2/150
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.6248 - loss: 0.6497 - val_accuracy: 0.0428 - val_loss: 1.0841
Epoch 3/150
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.6330 - loss: 0.6450 - val_accuracy: 0.0536 - val_loss: 1.0515
Epoch 4/150
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - accuracy: 0.6307 - loss: 0.6464 - val_accuracy: 0.1445 - val_loss: 1.0172
Epoch 5/150
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - accuracy: 0.6327 - loss: 0.6443 - val_accuracy: 0.0979 - val_loss: 1.1059
Epoch 6/150
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 30ms/step - accuracy: 0.6410 - loss: 0.6429 - val_accuracy: 0.0422 - val_loss: 1.1706
Epoch 7/150
[1m297/297[0m

Smote_Previsao_3cvs_2019-2023_v5.h5

In [42]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd
from imblearn.combine import SMOTEENN  # Importando SMOTE + ENN
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Carregar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Criar a coluna de ocorrências (0 ou 1)
df1['num_ocorrencias'] = df1['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df1['num_ocorrencias']
X_train = df1.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Balancear os dados usando SMOTE + ENN
smote_enn = SMOTEENN(random_state=42)
X_resampled, y_resampled = smote_enn.fit_resample(X_train_scaled, y_train)

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=128, input_shape=(X_resampled.shape[1], 1), return_sequences=True))  # Aumentar unidades
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(LSTM(units=128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(LSTM(units=128, return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Ajustar o X_resampled para o formato correto
X_resampled_reshaped = X_resampled.reshape((X_resampled.shape[0], X_resampled.shape[1], 1))

# Callbacks para Early Stopping e Model Checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_auc', mode='max')

# Treinar o modelo
model.fit(
    X_resampled_reshaped,
    y_resampled,
    epochs=200,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, model_checkpoint]
)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_v5.h5')

# Preparar os dados de teste
df2['num_ocorrencias'] = df2['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df2['num_ocorrencias']
X_test = df2.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes no conjunto de teste
X_test = imputer.transform(X_test)
X_test_scaled = scaler.transform(X_test)

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')

# Preparar os dados de teste final (df3)
df3['num_ocorrencias'] = df3['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test_final = df3['num_ocorrencias']
X_test_final = df3.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes no conjunto de teste
X_test_final = imputer.transform(X_test_final)
X_test_final_scaled = scaler.transform(X_test_final)

# Ajustar o X_test_final para o formato correto
X_test_final_reshaped = X_test_final_scaled.reshape((X_test_final_scaled.shape[0], X_test_final_scaled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_final_prob = model.predict(X_test_final_reshaped)
y_pred_final = (y_pred_final_prob > 0.5).astype(int)

# Calcular as métricas para df3
accuracy_final = accuracy_score(y_test_final, y_pred_final)
recall_final = recall_score(y_test_final, y_pred_final)
roc_auc_final = roc_auc_score(y_test_final, y_pred_final)

# Calcular especificidade
tn_final, fp_final, fn_final, tp_final = confusion_matrix(y_test_final, y_pred_final).ravel()
specificity_final = tn_final / (tn_final + fp_final)

# Imprimir as métricas para df3
print(f'Acurácia no conjunto de teste (df3): {accuracy_final:.4f}')
print(f'Sensibilidade (Recall) no conjunto de teste (df3): {recall_final:.4f}')
print(f'AUC no conjunto de teste (df3): {roc_auc_final:.4f}')
print(f'Especificidade no conjunto de teste (df3): {specificity_final:.4f}')


  super().__init__(**kwargs)


Epoch 1/200
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 39ms/step - accuracy: 0.6302 - loss: 0.6468 - val_accuracy: 0.5056 - val_loss: 0.6859
Epoch 2/200
[1m  3/333[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8s[0m 26ms/step - accuracy: 0.6059 - loss: 0.6347 

  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 33ms/step - accuracy: 0.6340 - loss: 0.6396 - val_accuracy: 0.6398 - val_loss: 0.6125
Epoch 3/200
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 37ms/step - accuracy: 0.6383 - loss: 0.6371 - val_accuracy: 0.6958 - val_loss: 0.5432
Epoch 4/200
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 32ms/step - accuracy: 0.6377 - loss: 0.6362 - val_accuracy: 0.6889 - val_loss: 0.5619
Epoch 5/200
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 27ms/step - accuracy: 0.6423 - loss: 0.6325 - val_accuracy: 0.5028 - val_loss: 0.7181
Epoch 6/200
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 27ms/step - accuracy: 0.6314 - loss: 0.6394 - val_accuracy: 0.6202 - val_loss: 0.6101
Epoch 7/200
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 32ms/step - accuracy: 0.6401 - loss: 0.6321 - val_accuracy: 0.6287 - val_loss: 0.6191
Epoch 8/200
[1m333/333

In [45]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler

# Carregar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Concatenar df1 e df3 para o conjunto de treino
df_train = pd.concat([df1, df3])

# Criar a coluna de ocorrências (0 ou 1)
df_train['num_ocorrencias'] = df_train['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df_train['num_ocorrencias']
X_train = df_train.drop('num_ocorrencias', axis=1)

# Preparar o conjunto de teste
df2['num_ocorrencias'] = df2['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df2['num_ocorrencias']
X_test = df2.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes (preenchendo com a média das colunas, por exemplo)
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Aplicar SMOTE para aumentar a classe minoritária
smote = SMOTE(sampling_strategy='minority', random_state=42)  # Gerando amostras da classe minoritária
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)

# Aplicar Undersampling na classe majoritária
undersampler = RandomUnderSampler(sampling_strategy='majority', random_state=42)  # Reduzindo a classe majoritária
X_resampled, y_resampled = undersampler.fit_resample(X_resampled, y_resampled)

# Verificar a forma de X_resampled
print(f"Forma de X_resampled: {X_resampled.shape}")

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=128, input_shape=(X_resampled.shape[1], 1), return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(LSTM(units=128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Ajustar o X_resampled para o formato correto
X_resampled_reshaped = X_resampled.reshape((X_resampled.shape[0], X_resampled.shape[1], 1))

# Treinar o modelo
model.fit(
    X_resampled_reshaped,
    y_resampled,
    epochs=150,  
    batch_size=32,
    validation_split=0.2
)

# Preparar o conjunto de teste para previsão
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')


Forma de X_resampled: (111930, 4)
Epoch 1/150


  super().__init__(**kwargs)


[1m2799/2799[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.6242 - loss: 0.6554 - val_accuracy: 0.0618 - val_loss: 0.9370
Epoch 2/150
[1m2799/2799[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 15ms/step - accuracy: 0.6264 - loss: 0.6519 - val_accuracy: 0.1900 - val_loss: 0.9611
Epoch 3/150
[1m2799/2799[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 9ms/step - accuracy: 0.6294 - loss: 0.6514 - val_accuracy: 0.1790 - val_loss: 0.8517
Epoch 4/150
[1m2799/2799[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 10ms/step - accuracy: 0.6247 - loss: 0.6520 - val_accuracy: 0.1040 - val_loss: 0.9702
Epoch 5/150
[1m2799/2799[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 10ms/step - accuracy: 0.6280 - loss: 0.6505 - val_accuracy: 0.1093 - val_loss: 0.9090
Epoch 6/150
[1m2799/2799[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 10ms/step - accuracy: 0.6287 - loss: 0.6494 - val_accuracy: 0.1419 - val_loss: 0.9420
Epoch 7/150

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix
import pandas as pd
from imblearn.combine import SMOTEENN  # Importando SMOTE + ENN
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Carregar os dados
df1 = pd.read_csv('clima_queda_parelheiros_2019-2023.csv')
df2 = pd.read_csv('clima_queda_santana_2019-2023.csv')
df3 = pd.read_csv('clima_queda_virginha_2019-2023.csv')

# Criar a coluna de ocorrências (0 ou 1)
df1['num_ocorrencias'] = df1['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_train = df1['num_ocorrencias']
X_train = df1.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)

# Escalonar os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Balancear os dados usando SMOTE + ENN
smote_enn = SMOTEENN(random_state=42)
X_resampled, y_resampled = smote_enn.fit_resample(X_train_scaled, y_train)

# Criar o modelo LSTM
model = Sequential()
model.add(LSTM(units=128, input_shape=(X_resampled.shape[1], 1), return_sequences=True))  # Aumentar unidades
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(LSTM(units=128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(LSTM(units=128, return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(units=1, activation='sigmoid'))

# Compilar o modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Ajustar o X_resampled para o formato correto
X_resampled_reshaped = X_resampled.reshape((X_resampled.shape[0], X_resampled.shape[1], 1))

# Callbacks para Early Stopping e Model Checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_auc', mode='max')

# Treinar o modelo
model.fit(
    X_resampled_reshaped,
    y_resampled,
    epochs=200,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, model_checkpoint]
)

# Salvar o modelo
model.save('Smote_Previsao_3cvs_2019-2023_v5.h5')

# Preparar os dados de teste
df2['num_ocorrencias'] = df2['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test = df2['num_ocorrencias']
X_test = df2.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes no conjunto de teste
X_test = imputer.transform(X_test)
X_test_scaled = scaler.transform(X_test)

# Ajustar o X_test para o formato correto
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_prob = model.predict(X_test_reshaped)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calcular as métricas
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Calcular especificidade
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Imprimir as métricas
print(f'Acurácia: {accuracy:.4f}')
print(f'Sensibilidade (Recall): {recall:.4f}')
print(f'AUC: {roc_auc:.4f}')
print(f'Especificidade: {specificity:.4f}')

# Preparar os dados de teste final (df3)
df3['num_ocorrencias'] = df3['num_ocorrencias'].apply(lambda x: 1 if x != 0 else 0)
y_test_final = df3['num_ocorrencias']
X_test_final = df3.drop('num_ocorrencias', axis=1)

# Imputar valores ausentes no conjunto de teste
X_test_final = imputer.transform(X_test_final)
X_test_final_scaled = scaler.transform(X_test_final)

# Ajustar o X_test_final para o formato correto
X_test_final_reshaped = X_test_final_scaled.reshape((X_test_final_scaled.shape[0], X_test_final_scaled.shape[1], 1))

# Prever usando o modelo treinado
y_pred_final_prob = model.predict(X_test_final_reshaped)
y_pred_final = (y_pred_final_prob > 0.5).astype(int)

# Calcular as métricas para df3
accuracy_final = accuracy_score(y_test_final, y_pred_final)
recall_final = recall_score(y_test_final, y_pred_final)
roc_auc_final = roc_auc_score(y_test_final, y_pred_final)

# Calcular especificidade
tn_final, fp_final, fn_final, tp_final = confusion_matrix(y_test_final, y_pred_final).ravel()
specificity_final = tn_final / (tn_final + fp_final)

# Imprimir as métricas para df3
print(f'Acurácia no conjunto de teste (df3): {accuracy_final:.4f}')
print(f'Sensibilidade (Recall) no conjunto de teste (df3): {recall_final:.4f}')
print(f'AUC no conjunto de teste (df3): {roc_auc_final:.4f}')
print(f'Especificidade no conjunto de teste (df3): {specificity_final:.4f}')
