In [66]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import openpyxl

# Load the dataset
file_path = "Flattened_Seismic_Data_With_Ranges.xlsx"
df = pd.read_excel(file_path)

# Updated precursor parameters based on your research needs
all_columns = [
    "Earthquake greater than 4_old",
    "Fault Density_old",
    "Fault Distances_old",
    "Combined Strain Rate Grid_old",
   "seismic_energy_new",
    "Smoothed Time Since EQ_old",
    "b_value_lsq_new",
    "b_value_mlk_new",
    "probabilities_m6_lsq_new",
    "probabilities_m6_mlk_new",
    "magnitude_deficit_lsq_new",
    "magnitude_deficit_mlk_new",
    "seismic_rate_new",
   
    "max_magnitude_new",
    
    "recurrence_time_4.0_lsq_new",
    "recurrence_time_4.0_mlk_new",
    "recurrence_time_4.1_lsq_new",
    "recurrence_time_4.1_mlk_new",
    "recurrence_time_4.2_lsq_new",
    "recurrence_time_4.2_mlk_new",
    "recurrence_time_4.3_lsq_new",
    "recurrence_time_4.3_mlk_new",
    "recurrence_time_4.4_lsq_new",
    "recurrence_time_4.4_mlk_new",
    "recurrence_time_4.5_lsq_new",
    "recurrence_time_4.5_mlk_new",
    "recurrence_time_4.6_lsq_new",
    "recurrence_time_4.6_mlk_new",
    "recurrence_time_4.7_lsq_new",
    "recurrence_time_4.7_mlk_new",
    "recurrence_time_4.8_lsq_new",
    "recurrence_time_4.8_mlk_new",
    "recurrence_time_4.9_lsq_new",
    "recurrence_time_4.9_mlk_new",
    "recurrence_time_5.0_lsq_new",
    "recurrence_time_5.0_mlk_new",
    "recurrence_time_5.1_lsq_new",
    "recurrence_time_5.1_mlk_new",
    "recurrence_time_5.2_lsq_new",
    "recurrence_time_5.2_mlk_new",
    "recurrence_time_5.3_lsq_new",
    "recurrence_time_5.3_mlk_new",
    "recurrence_time_5.4_lsq_new",
    "recurrence_time_5.4_mlk_new",
    "recurrence_time_5.5_lsq_new",
    "recurrence_time_5.5_mlk_new",
    "recurrence_time_5.6_lsq_new",
    "recurrence_time_5.6_mlk_new",
    "recurrence_time_5.7_lsq_new",
    "recurrence_time_5.7_mlk_new",
    "recurrence_time_5.8_lsq_new",
    "recurrence_time_5.8_mlk_new",
    "recurrence_time_5.9_lsq_new",
    "recurrence_time_5.9_mlk_new",
    "recurrence_time_6.0_lsq_new",
    "recurrence_time_6.0_mlk_new"
]

# Updated target variable
target_column =  "mean_magnitude_new"

# Data cleaning and preprocessing
df_clean = df[all_columns + [target_column]]
df_clean = df_clean.apply(pd.to_numeric, errors='coerce')

# Separate features and target
X = df_clean[all_columns]
y = df_clean[target_column]

# Feature selection process
mi_scores = mutual_info_regression(X, y)
mi_results = pd.DataFrame({'Feature': X.columns, 'Mutual_Info_Score': mi_scores})
mi_results = mi_results.sort_values(by='Mutual_Info_Score', ascending=False)

# Threshold selection (using median instead of mean for skewed distributions)
mi_threshold = mi_results['Mutual_Info_Score'].median()
selected_features = mi_results[mi_results['Mutual_Info_Score'] >= mi_threshold]['Feature'].tolist()

# Enhanced redundancy check (considering energy-specific relationships)
redundancy_threshold = 2.0
redundant_features = set()

# Check pairwise MI between selected features
for i, feature1 in enumerate(selected_features):
    for feature2 in selected_features[i+1:]:
        mi_value = mutual_info_regression(X[[feature1]], X[feature2])[0]
        if mi_value >= redundancy_threshold:
            # Keep the feature with higher MI to target
            if mi_results.loc[mi_results['Feature'] == feature1, 'Mutual_Info_Score'].values[0] > \
               mi_results.loc[mi_results['Feature'] == feature2, 'Mutual_Info_Score'].values[0]:
                redundant_features.add(feature2)
            else:
                redundant_features.add(feature1)

final_features = [f for f in selected_features if f not in redundant_features]

# Save results with improved formatting
# Save results with improved formatting
excel_path = "Seismic_Energy_Precursor_Selection.xlsx"
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
    mi_results.to_excel(writer, sheet_name="Mutual Information", index=False)
    
    summary_df = pd.DataFrame({
        'Selected Features': final_features,
        'MI Score': [mi_results.loc[mi_results['Feature'] == f, 'Mutual_Info_Score'].values[0] 
                     for f in final_features]  # Added closing bracket here
    })
    summary_df.to_excel(writer, sheet_name="Final Features", index=False)

print("Feature selection completed successfully!")
print(f"Top  energy-related precursors:\n{summary_df.head(15)}")

Feature selection completed successfully!
Top  energy-related precursors:
                Selected Features  MI Score
0               max_magnitude_new  1.078552
1       magnitude_deficit_lsq_new  0.330083
2   Earthquake greater than 4_old  0.281494
3       magnitude_deficit_mlk_new  0.249386
4      Smoothed Time Since EQ_old  0.206465
5        probabilities_m6_lsq_new  0.204380
6        probabilities_m6_mlk_new  0.178276
7                seismic_rate_new  0.172723
8   Combined Strain Rate Grid_old  0.129685
9             Fault Distances_old  0.113822
10                b_value_mlk_new  0.087389
11                b_value_lsq_new  0.083017
12             seismic_energy_new  0.078538
13    recurrence_time_4.8_mlk_new  0.078247


In [1]:
import pandas as pd
import numpy as np
import shap
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import openpyxl

# Load the dataset
file_path = "Flattened_Seismic_Data_With_Ranges.xlsx"
df = pd.read_excel(file_path)

# Updated precursor parameters based on your research needs
all_columns = [
    "Earthquake greater than 4_old",
    "Fault Density_old",
    "Fault Distances_old",
    "Combined Strain Rate Grid_old",
    "seismic_energy_new",
    "Smoothed Time Since EQ_old",
    "b_value_lsq_new",
    "b_value_mlk_new",
    "probabilities_m6_lsq_new",
    "probabilities_m6_mlk_new",
    "magnitude_deficit_lsq_new",
    "magnitude_deficit_mlk_new",
    "seismic_rate_new",
    "max_magnitude_new",
    "recurrence_time_4.0_lsq_new",
    "recurrence_time_4.0_mlk_new",
    "recurrence_time_4.1_lsq_new",
    "recurrence_time_4.1_mlk_new",
    "recurrence_time_4.2_lsq_new",
    "recurrence_time_4.2_mlk_new",
    "recurrence_time_4.3_lsq_new",
    "recurrence_time_4.3_mlk_new",
    "recurrence_time_4.4_lsq_new",
    "recurrence_time_4.4_mlk_new",
    "recurrence_time_4.5_lsq_new",
    "recurrence_time_4.5_mlk_new",
    "recurrence_time_4.6_lsq_new",
    "recurrence_time_4.6_mlk_new",
    "recurrence_time_4.7_lsq_new",
    "recurrence_time_4.7_mlk_new",
    "recurrence_time_4.8_lsq_new",
    "recurrence_time_4.8_mlk_new",
    "recurrence_time_4.9_lsq_new",
    "recurrence_time_4.9_mlk_new",
    "recurrence_time_5.0_lsq_new",
    "recurrence_time_5.0_mlk_new",
    "recurrence_time_5.1_lsq_new",
    "recurrence_time_5.1_mlk_new",
    "recurrence_time_5.2_lsq_new",
    "recurrence_time_5.2_mlk_new",
    "recurrence_time_5.3_lsq_new",
    "recurrence_time_5.3_mlk_new",
    "recurrence_time_5.4_lsq_new",
    "recurrence_time_5.4_mlk_new",
    "recurrence_time_5.5_lsq_new",
    "recurrence_time_5.5_mlk_new",
    "recurrence_time_5.6_lsq_new",
    "recurrence_time_5.6_mlk_new",
    "recurrence_time_5.7_lsq_new",
    "recurrence_time_5.7_mlk_new",
    "recurrence_time_5.8_lsq_new",
    "recurrence_time_5.8_mlk_new",
    "recurrence_time_5.9_lsq_new",
    "recurrence_time_5.9_mlk_new",
    "recurrence_time_6.0_lsq_new",
    "recurrence_time_6.0_mlk_new"
]

# Updated target variable
target_column = "mean_magnitude_new"

# Data cleaning and preprocessing
df_clean = df[all_columns + [target_column]]
df_clean = df_clean.apply(pd.to_numeric, errors='coerce')

# Separate features and target
X = df_clean[all_columns]
y = df_clean[target_column]

# Train a Random Forest model (or any model you prefer)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# SHAP value calculation
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)

# Create a DataFrame for SHAP values
shap_df = pd.DataFrame(shap_values, columns=X.columns)

shap_abs_mean = shap_df.abs().mean()
shap_sorted = shap_abs_mean.sort_values(ascending=False)
shap_threshold = shap_sorted.median()
selected_features = shap_sorted[shap_sorted >= shap_threshold].index.tolist()

redundancy_threshold = 2.0
redundant_features = set()

# Check pairwise SHAP values between selected features (using a simple correlation method)
for i, feature1 in enumerate(selected_features):
    for feature2 in selected_features[i+1:]:
        corr_value = np.corrcoef(shap_df[feature1], shap_df[feature2])[0, 1]
        if abs(corr_value) >= redundancy_threshold:
            # Keep the feature with higher SHAP value mean
            if shap_abs_mean[feature1] > shap_abs_mean[feature2]:
                redundant_features.add(feature2)
            else:
                redundant_features.add(feature1)

final_features = [f for f in selected_features if f not in redundant_features]

# Save results with improved formatting
excel_path = "Seismic_Energy_Precursor_Selection_SHAP.xlsx"
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
    shap_sorted.to_excel(writer, sheet_name="SHAP Values", index=True)
    
    summary_df = pd.DataFrame({
        'Selected Features': final_features,
        'Mean SHAP Value': [shap_abs_mean[f] for f in final_features]
    })
    summary_df.to_excel(writer, sheet_name="Final Features", index=False)

print("Feature selection completed successfully using SHAP!")
print(f"Top energy-related precursors based on SHAP values:\n{summary_df.head(15)}")


Feature selection completed successfully using SHAP!
Top energy-related precursors based on SHAP values:
                Selected Features  Mean SHAP Value
0               max_magnitude_new         0.318153
1                seismic_rate_new         0.011462
2       magnitude_deficit_mlk_new         0.010352
3   Combined Strain Rate Grid_old         0.006967
4   Earthquake greater than 4_old         0.005330
5       magnitude_deficit_lsq_new         0.004534
6              seismic_energy_new         0.003917
7             Fault Distances_old         0.003228
8      Smoothed Time Since EQ_old         0.002419
9        probabilities_m6_lsq_new         0.001095
10                b_value_lsq_new         0.000975
11       probabilities_m6_mlk_new         0.000844
12                b_value_mlk_new         0.000560
13    recurrence_time_4.0_lsq_new         0.000557
14              Fault Density_old         0.000494


In [5]:
import pandas as pd
import numpy as np
import shap
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.inspection import permutation_importance
import openpyxl

# Load the dataset
file_path = "Flattened_Seismic_Data_With_Ranges.xlsx"
df = pd.read_excel(file_path)

# Updated precursor parameters
#3all_columns = [ ... ]  # Same list you already defined earlier
all_columns = [
    "Earthquake greater than 4_old",
    "mean_magnitude_new",
    "Fault Density_old",
    "Fault Distances_old",
    "Combined Strain Rate Grid_old",
    "Smoothed Time Since EQ_old",
    "b_value_lsq_new",
    "b_value_mlk_new",
    "probabilities_m6_lsq_new",
    "probabilities_m6_mlk_new",
    "magnitude_deficit_lsq_new",
    "magnitude_deficit_mlk_new",
    "seismic_rate_new",
    "max_magnitude_new",
    "recurrence_time_4.0_lsq_new",
    "recurrence_time_4.0_mlk_new",
    "recurrence_time_4.1_lsq_new",
    "recurrence_time_4.1_mlk_new",
    "recurrence_time_4.2_lsq_new",
    "recurrence_time_4.2_mlk_new",
    "recurrence_time_4.3_lsq_new",
    "recurrence_time_4.3_mlk_new",
    "recurrence_time_4.4_lsq_new",
    "recurrence_time_4.4_mlk_new",
    "recurrence_time_4.5_lsq_new",
    "recurrence_time_4.5_mlk_new",
    "recurrence_time_4.6_lsq_new",
    "recurrence_time_4.6_mlk_new",
    "recurrence_time_4.7_lsq_new",
    "recurrence_time_4.7_mlk_new",
    "recurrence_time_4.8_lsq_new",
    "recurrence_time_4.8_mlk_new",
    "recurrence_time_4.9_lsq_new",
    "recurrence_time_4.9_mlk_new",
    "recurrence_time_5.0_lsq_new",
    "recurrence_time_5.0_mlk_new",
    "recurrence_time_5.1_lsq_new",
    "recurrence_time_5.1_mlk_new",
    "recurrence_time_5.2_lsq_new",
    "recurrence_time_5.2_mlk_new",
    "recurrence_time_5.3_lsq_new",
    "recurrence_time_5.3_mlk_new",
    "recurrence_time_5.4_lsq_new",
    "recurrence_time_5.4_mlk_new",
    "recurrence_time_5.5_lsq_new",
    "recurrence_time_5.5_mlk_new",
    "recurrence_time_5.6_lsq_new",
    "recurrence_time_5.6_mlk_new",
    "recurrence_time_5.7_lsq_new",
    "recurrence_time_5.7_mlk_new",
    "recurrence_time_5.8_lsq_new",
    "recurrence_time_5.8_mlk_new",
    "recurrence_time_5.9_lsq_new",
    "recurrence_time_5.9_mlk_new",
    "recurrence_time_6.0_lsq_new",
    "recurrence_time_6.0_mlk_new"
]

# Target variable
target_column = "seismic_energy_new"

# Clean data
df_clean = df[all_columns + [target_column]]
df_clean = df_clean.apply(pd.to_numeric, errors='coerce')
df_clean = df_clean.dropna()

# Separate features and target
X = df_clean[all_columns]
y = df_clean[target_column]

# Train Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# ---------------------------
# PERMUTATION FEATURE IMPORTANCE
# ---------------------------
perm_importance = permutation_importance(model, X, y, n_repeats=10, random_state=42, scoring='r2')

# Create DataFrame from permutation results
perm_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance Mean': perm_importance.importances_mean,
    'Importance Std': perm_importance.importances_std
}).sort_values(by='Importance Mean', ascending=False)

# Thresholding (optional): You can set a threshold to filter features
median_importance = perm_df['Importance Mean'].median()
selected_perm_features = perm_df[perm_df['Importance Mean'] >= median_importance]

# Save results to Excel
excel_path = "Seismic_Energy_Precursor_Selection_Permutation.xlsx"
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
    perm_df.to_excel(writer, sheet_name="Permutation Importance", index=False)
    selected_perm_features.to_excel(writer, sheet_name="Final Features", index=False)

print("Feature selection completed using Permutation Importance!")
print(f"Top features:\n{selected_perm_features.head(15)}")


Feature selection completed using Permutation Importance!
Top features:
                          Feature  Importance Mean  Importance Std
13              max_magnitude_new         1.394275        0.038703
4   Combined Strain Rate Grid_old         0.067129        0.005219
8        probabilities_m6_lsq_new         0.051765        0.004676
1              mean_magnitude_new         0.046215        0.003882
6                 b_value_lsq_new         0.032892        0.002391
5      Smoothed Time Since EQ_old         0.031975        0.003275
11      magnitude_deficit_mlk_new         0.027193        0.002616
0   Earthquake greater than 4_old         0.020376        0.003020
12               seismic_rate_new         0.019332        0.001635
10      magnitude_deficit_lsq_new         0.012467        0.000606
3             Fault Distances_old         0.007716        0.000725
2               Fault Density_old         0.003842        0.000617
9        probabilities_m6_mlk_new         0.001571       

In [1]:
##### LSTM code with attenuation mechanism layer

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load Dataset
# Load the dataset
file_path = "Flattened_Seismic_Data_With_Ranges.xlsx"
df = pd.read_excel(file_path)

# Updated precursor parameters based on your research needs
all_columns = [
    "Earthquake greater than 4_old",
    "Fault Density_old",
    "Fault Distances_old",
    "Combined Strain Rate Grid_old",
   "seismic_energy_new",
    "Smoothed Time Since EQ_old",
    "b_value_lsq_new",
    "b_value_mlk_new",
    "probabilities_m6_lsq_new",
    "probabilities_m6_mlk_new",
    "magnitude_deficit_lsq_new",
    "magnitude_deficit_mlk_new",
    "seismic_rate_new",
   
    "max_magnitude_new",
    
    "recurrence_time_4.0_lsq_new",
    "recurrence_time_4.0_mlk_new",
    "recurrence_time_4.1_lsq_new",
    "recurrence_time_4.1_mlk_new",
    "recurrence_time_4.2_lsq_new",
    "recurrence_time_4.2_mlk_new",
    "recurrence_time_4.3_lsq_new",
    "recurrence_time_4.3_mlk_new",
    "recurrence_time_4.4_lsq_new",
    "recurrence_time_4.4_mlk_new",
    "recurrence_time_4.5_lsq_new",
    "recurrence_time_4.5_mlk_new",
    "recurrence_time_4.6_lsq_new",
    "recurrence_time_4.6_mlk_new",
    "recurrence_time_4.7_lsq_new",
    "recurrence_time_4.7_mlk_new",
    "recurrence_time_4.8_lsq_new",
    "recurrence_time_4.8_mlk_new",
    "recurrence_time_4.9_lsq_new",
    "recurrence_time_4.9_mlk_new",
    "recurrence_time_5.0_lsq_new",
    "recurrence_time_5.0_mlk_new",
    "recurrence_time_5.1_lsq_new",
    "recurrence_time_5.1_mlk_new",
    "recurrence_time_5.2_lsq_new",
    "recurrence_time_5.2_mlk_new",
    "recurrence_time_5.3_lsq_new",
    "recurrence_time_5.3_mlk_new",
    "recurrence_time_5.4_lsq_new",
    "recurrence_time_5.4_mlk_new",
    "recurrence_time_5.5_lsq_new",
    "recurrence_time_5.5_mlk_new",
    "recurrence_time_5.6_lsq_new",
    "recurrence_time_5.6_mlk_new",
    "recurrence_time_5.7_lsq_new",
    "recurrence_time_5.7_mlk_new",
    "recurrence_time_5.8_lsq_new",
    "recurrence_time_5.8_mlk_new",
    "recurrence_time_5.9_lsq_new",
    "recurrence_time_5.9_mlk_new",
    "recurrence_time_6.0_lsq_new",
    "recurrence_time_6.0_mlk_new"
]

# Updated target variable
target_column =  "mean_magnitude_new"

# Remove NaN values
df_clean = df[feature_columns + [target_column]].dropna()

# Normalize data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(df_clean[feature_columns])
y_scaled = scaler_y.fit_transform(df_clean[[target_column]])

# Define sliding window function
### instead of using the sliding window feature go for absolute method of forecasting.....
#def create_sliding_windows(X, y, look_back=10):
#    X_windows, y_windows = [], []
#    for i in range(len(X) - look_back):
#        X_windows.append(X[i:i+look_back])
#        y_windows.append(y[i+look_back])
#    return np.array(X_windows), np.array(y_windows)

#look_back = 10  # Predicting based on past 10 time steps
X_lstm, y_lstm = create_sliding_windows(X_scaled, y_scaled, look_back)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42, shuffle=False)

# Build LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(look_back, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dense(25, activation='relu'),
    Dense(1)  # Predicting a single magnitude value
])

