Importing Required Libraries

In [66]:
# Data Manipulation and Analysis
import pandas as pd
import numpy as np

# Machine Learning and Preprocessing
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import silhouette_score, mean_absolute_error, mean_squared_error, r2_score, precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split

# Deep Learning Framework
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Visualization Libraries
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt

In [59]:
# Load the dataset
file_path = "/content/Mera_CleanData.csv"
data = pd.read_csv(file_path)
data = data.drop('extreme_weather', axis=1)

In [60]:
data

Unnamed: 0,date,max_temp,min_temp,igmin,min_grass,rain,pressure_cbl,wind_speed,max_10minute_wind,dir_10minute_wind,...,day,temp_range,smd_combined,wind_category,extreme_rain,heatwave,high_wind,extreme_weather_combination,sun_category,season
0,1979-10-28,-0.27,-0.36,1,-1.18,0.9,997.9,-0.31,-0.26,0.89,...,28,7.5,-0.13,W,0,1,1,0-1-1,Extremely High,Autumn
1,1979-10-29,-0.48,-0.48,0,-0.29,0.7,998.8,-0.45,-0.88,0.42,...,29,7.0,-0.13,SW,0,1,1,0-1-1,Moderate,Autumn
2,1979-10-30,0.48,0.60,0,0.62,4.8,990.8,0.92,0.96,0.07,...,30,6.9,-2.30,SW,1,1,1,1-1-1,Moderate,Autumn
3,1979-10-31,-0.15,0.22,0,0.21,0.5,990.0,0.74,0.96,0.54,...,31,5.5,-0.82,W,0,1,1,0-1-1,Extremely High,Autumn
4,1979-11-01,-0.40,-0.14,0,-0.04,0.0,1001.6,0.51,0.20,0.42,...,1,5.9,0.30,SW,0,1,1,0-1-1,Extremely High,Autumn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16436,2024-10-27,0.26,-0.50,0,-0.50,3.1,1007.8,-0.27,-0.42,-0.04,...,27,10.7,2.32,S,1,1,1,1-1-1,Moderate,Autumn
16437,2024-10-28,0.28,1.15,0,0.64,1.1,1007.8,-0.36,-0.88,0.77,...,28,3.5,1.92,W,0,1,1,0-1-1,Moderate,Autumn
16438,2024-10-29,-0.38,0.76,0,0.85,0.0,1015.1,-0.70,-0.88,1.01,...,29,2.0,2.25,W,0,1,1,0-1-1,Moderate,Autumn
16439,2024-10-30,0.34,0.78,0,0.75,0.0,1018.7,-1.34,-1.49,0.89,...,30,5.4,2.65,W,0,1,1,0-1-1,Extremely High,Autumn


Cluster Analysis

In [64]:
# Select relevant features for clustering
features = ['extreme_rain', 'heatwave', 'high_wind']
data_clustering = data[features].dropna()

# Standardize features for clustering
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data_clustering)

# Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)  # Adjust n_clusters based on your requirements
kmeans_labels = kmeans.fit_predict(scaled_features)
silhouette_kmeans = silhouette_score(scaled_features, kmeans_labels)

# Add cluster labels to the dataset
data_clustering['Cluster'] = kmeans_labels

# Visualization: Extreme Rain vs High Wind
fig1 = px.scatter(
    data_clustering,
    x='extreme_rain',
    y='high_wind',
    color='Cluster',
    title="K-Means Clustering: Extreme Rain vs High Wind",
    labels={'Cluster': 'Cluster'},
    template="plotly_dark"
)
fig1.update_layout(
    xaxis_title="Extreme Rain",
    yaxis_title="High Wind",
    legend_title="Cluster",
    height=600,
    width=800
)
fig1.show()

print("\n")

#Visualization: Heatwave vs High Wind
fig2 = px.scatter(
    data_clustering,
    x='heatwave',
    y='high_wind',
    color='Cluster',
    title="K-Means Clustering: Heatwave vs High Wind",
    labels={'Cluster': 'Cluster'},
    template="plotly_dark"
)
fig2.update_layout(
    xaxis_title="Heatwave",
    yaxis_title="High Wind",
    legend_title="Cluster",
    height=600,
    width=800
)
fig2.show()

print("\n")

# 3D Scatter Plot for Extreme Rain, Heatwave, and High Wind
fig3 = px.scatter_3d(
    data_clustering,
    x='extreme_rain',
    y='heatwave',
    z='high_wind',
    color='Cluster',
    title="K-Means Clustering: 3D Visualization",
    labels={'Cluster': 'Cluster'},
    template="plotly_dark"
)
fig3.update_layout(
    scene=dict(
        xaxis_title="Extreme Rain",
        yaxis_title="Heatwave",
        zaxis_title="High Wind"
    ),
    height=700,
    width=900
)
fig3.show()
print("\n")
# showing cluster averages for further insights
print(f"K-Means Silhouette Score: {silhouette_kmeans}")
cluster_means = data_clustering.groupby('Cluster').mean()
print("Cluster Means:\n", cluster_means)











K-Means Silhouette Score: 0.9902372861962644
Cluster Means:
          extreme_rain  heatwave  high_wind
Cluster                                   
0             0.00000       1.0   0.999501
1             1.00000       1.0   1.000000
2             0.21393       0.0   1.000000


LSTM Model

In [65]:
# Assign weights to features and calculate normalized Extreme_Weather
rain_weight = 0.333
heatwave_weight = 0.333
high_wind_weight = 0.333

data['Extreme_Weather'] = (
    data['extreme_rain'] * rain_weight +
    data['heatwave'] * heatwave_weight +
    data['high_wind'] * high_wind_weight
)
data['Extreme_Weather'] = data['Extreme_Weather'] / data['Extreme_Weather'].max()

# Select features and target
features = ['extreme_rain', 'heatwave', 'high_wind']
target = 'Extreme_Weather'
data = data[features + [target]].dropna()

# Normalize features
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Create sequences for LSTM
def create_sequences(data, target_col, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i + seq_length][features].values)
        y.append(data.iloc[i + seq_length][target_col])
    return np.array(X), np.array(y)

seq_length = 90
X, y = create_sequences(data, target, seq_length)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Build LSTM Model
model = Sequential([
    LSTM(128, activation='relu', return_sequences=True, input_shape=(seq_length, len(features))),
    Dropout(0.3),
    LSTM(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/100



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 38ms/step - loss: 0.0256 - mae: 0.1296 - val_loss: 0.0242 - val_mae: 0.1060
Epoch 2/100
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 28ms/step - loss: 0.0224 - mae: 0.1237 - val_loss: 0.0222 - val_mae: 0.1244
Epoch 3/100
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - loss: 0.0221 - mae: 0.1246 - val_loss: 0.0223 - val_mae: 0.1164
Epoch 4/100
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - loss: 0.0217 - mae: 0.1211 - val_loss: 0.0221 - val_mae: 0.1202
Epoch 5/100
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - loss: 0.0220 - mae: 0.1234 - val_loss: 0.0220 - val_mae: 0.1289
Epoch 6/100
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - loss: 0.0220 - mae: 0.1238 - val_loss: 0.0220 - val_mae: 0.1184
Epoch 7/100
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27m

In [70]:
# Evaluate the model
y_pred = model.predict(X_test).flatten()
y_pred_binary = (y_pred > 0.5).astype(int)
y_test_binary = (y_test > 0.5).astype(int)

# Metrics
accuracy = accuracy_score(y_test_binary, y_pred_binary)
precision = precision_score(y_test_binary, y_pred_binary, zero_division=1)
f1 = f1_score(y_test_binary, y_pred_binary, zero_division=1)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

# Plot training history using Plotly
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines', name='Training Loss'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'))
fig_loss.update_layout(title="LSTM Training and Validation Loss",
                       xaxis_title="Epoch",
                       yaxis_title="Loss",
                       template="plotly_dark")
fig_loss.show()

# Plot predictions vs actual using Plotly
fig_predictions = go.Figure()
fig_predictions.add_trace(go.Scatter(y=y_test, mode='lines', name='Actual'))
fig_predictions.add_trace(go.Scatter(y=y_pred, mode='lines', name='Predicted'))
fig_predictions.update_layout(title="Predicted vs Actual Extreme_Weather",
                              xaxis_title="Time Step",
                              yaxis_title="Extreme_Weather",
                              template="plotly_dark")
fig_predictions.show()

[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


In [82]:
print(f"""
LSTM Model Performance:
------------------------
Accuracy: {accuracy:.4f}
Precision: {precision:.4f}
F1-Score: {f1:.4f}
Mean Absolute Error (MAE): {mae:.4f}
Root Mean Squared Error (RMSE): {rmse:.4f}
R² Score: {r2:.4f}
""")


LSTM Model Performance:
------------------------
Accuracy: 0.9898
Precision: 0.9898
F1-Score: 0.9949
Mean Absolute Error (MAE): 0.1256
Root Mean Squared Error (RMSE): 0.1472
R² Score: 0.0591

