In [1]:
## 🔧 Load Trained Earthquake Magnitude Model
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import os

# Load the saved model
model_path = 'best_asia_earthquake_model.pkl'  # Update path if needed
model = joblib.load(model_path)

print("✅ Model loaded successfully!")
print(f"Model type: {type(model)}")

✅ Model loaded successfully!
Model type: <class 'sklearn.ensemble._stacking.StackingRegressor'>


In [3]:
## 📥 Load Cleaned Dataset
# Load the cleaned dataset
df_clean = pd.read_csv('../data/cleaned/cleaned_asia_earthquake.csv')  # Adjust path if needed
print(f"Loaded data shape: {df_clean.shape}")

# Define target and features
target = 'mag'
X = df_clean.drop(columns=[target])
y = df_clean[target]

# Check columns match what the model expects
print("Features used by model:")
print(X.columns.tolist())

Loaded data shape: (251023, 19)
Features used by model:
['latitude', 'longitude', 'depth', 'nst', 'gap', 'rms', 'depthError', 'magNst', 'year', 'month_sin', 'month_cos', 'hour_sin', 'hour_cos', 'region', 'mag_Other', 'mag_mb', 'mag_md', 'mag_ml']


In [4]:
## 🔢 Split into Train/Test Sets
from sklearn.model_selection import train_test_split

# Use same random_state for consistency
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

print(f"Test set size: {len(X_test)}")

Test set size: 50205


In [5]:
## 📈 Evaluate Model Performance
# Predict on test set
y_pred = model.predict(X_test)

# Calculate metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("\n📊 Model Performance on Test Set")
print(f"  MAE  : {mae:.3f}")
print(f"  RMSE : {rmse:.3f}")
print(f"  R²   : {r2:.3f}")

# Bonus: % within ±0.3
errors = np.abs(y_test - y_pred)
in_03 = np.mean(errors <= 0.3) * 100
print(f"\n🎯 Within ±0.3 magnitude: {in_03:.1f}% of test samples")


📊 Model Performance on Test Set
  MAE  : 0.211
  RMSE : 0.294
  R²   : 0.780

🎯 Within ±0.3 magnitude: 77.5% of test samples


In [8]:
## 🔮 Predict for a Custom Earthquake
# Edit these values to simulate different earthquakes
lat = 43.773  # Latitude
log = 147.321  # Longitude (you called it 'log')
depth = 14.0
nst = 24
gap = 111.0
rms = 0.9
depthError = 4.7
magNst = 10
year = 1994

# One-hot encoded mag types — pick one
mag_mb = 1  # Assume mb was used
mag_ml = 0
mag_md = 0
mag_Other = 0

# Cyclical time (example: June, 12 PM)
month_sin = np.sin(2 * np.pi * 6 / 12)
month_cos = np.cos(2 * np.pi * 6 / 12)
hour_sin = np.sin(2 * np.pi * 12 / 24)
hour_cos = np.cos(2 * np.pi * 12 / 24)

# Region clustering (must use same KMeans logic as before)
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=10, random_state=42, n_init=10)
kmeans.fit(df_clean[['latitude', 'longitude']])  # Fit on full data
region = kmeans.predict([[lat, log]])[0]

# Create input DataFrame with correct column order
input_data = pd.DataFrame([{
    'latitude': lat,
    'longitude': log,
    'depth': depth,
    'nst': nst,
    'gap': gap,
    'rms': rms,
    'depthError': depthError,
    'magNst': magNst,
    'year': year,
    'month_sin': month_sin,
    'month_cos': month_cos,
    'hour_sin': hour_sin,
    'hour_cos': hour_cos,
    'region': region,
    'mag_mb': mag_mb,
    'mag_ml': mag_ml,
    'mag_md': mag_md,
    'mag_Other': mag_Other
}])

# Ensure column order matches training
input_data = input_data[X.columns]  # Reorder using original feature list

# Predict!
predicted_mag = model.predict(input_data)[0]
print(f"\n📍 Earthquake at Lat={lat}, Lon={log}")
print(f"💥 Predicted Magnitude: {predicted_mag:.2f}")




📍 Earthquake at Lat=43.773, Lon=147.321
💥 Predicted Magnitude: 4.57


In [7]:
## 🌍 10 Realistic Earthquake Samples: Actual vs Predicted
import joblib
import pandas as pd
import numpy as np

# Load your saved model (ensure path is correct)
try:
    model = model  # Use live model if available
    print("✅ Using live trained model")
except:
    model = joblib.load('best_earthquake_model.pkl')  # Fallback to saved file
    print("✅ Loaded model from .pkl")

# Sample of real-looking earthquakes from cleaned dataset (simulated rows)
samples = [
    {
        'latitude': 35.68, 'longitude': 139.68, 'depth': 30.0, 'nst': 45, 'gap': 80.0,
        'rms': 0.8, 'depthError': 5.0, 'magNst': 30, 'year': 2011,
        'month_sin': 0.5, 'month_cos': 0.866, 'hour_sin': 0.707, 'hour_cos': -0.707,
        'region': 3, 'mag_mb': 1, 'mag_ml': 0, 'mag_md': 0, 'mag_Other': 0, 'mag': 4.8
    },
    {
        'latitude': 27.8, 'longitude': 86.0, 'depth': 15.0, 'nst': 60, 'gap': 65.0,
        'rms': 1.5, 'depthError': 3.0, 'magNst': 40, 'year': 2015,
        'month_sin': 0.866, 'month_cos': 0.5, 'hour_sin': -0.707, 'hour_cos': 0.707,
        'region': 7, 'mag_mb': 0, 'mag_ml': 1, 'mag_md': 0, 'mag_Other': 0, 'mag': 7.8
    },
    {
        'latitude': 34.0, 'longitude': 74.0, 'depth': 10.0, 'nst': 30, 'gap': 150.0,
        'rms': 2.0, 'depthError': 15.0, 'magNst': 20, 'year': 2023,
        'month_sin': 0.259, 'month_cos': 0.966, 'hour_sin': 0.0, 'hour_cos': 1.0,
        'region': 6, 'mag_mb': 1, 'mag_ml': 0, 'mag_md': 0, 'mag_Other': 0, 'mag': 5.2
    },
    {
        'latitude': 38.0, 'longitude': 142.0, 'depth': 25.0, 'nst': 70, 'gap': 40.0,
        'rms': 0.6, 'depthError': 4.0, 'magNst': 50, 'year': 2011,
        'month_sin': -0.5, 'month_cos': 0.866, 'hour_sin': 0.966, 'hour_cos': -0.259,
        'region': 2, 'mag_mb': 0, 'mag_ml': 0, 'mag_md': 0, 'mag_Other': 1, 'mag': 9.0
    },
    {
        'latitude': 30.5, 'longitude': 65.0, 'depth': 200.0, 'nst': 25, 'gap': 100.0,
        'rms': 0.9, 'depthError': 10.0, 'magNst': 15, 'year': 2022,
        'month_sin': -0.866, 'month_cos': 0.5, 'hour_sin': -0.966, 'hour_cos': 0.259,
        'region': 8, 'mag_mb': 1, 'mag_ml': 0, 'mag_md': 0, 'mag_Other': 0, 'mag': 5.5
    },
    {
        'latitude': 40.0, 'longitude': 140.0, 'depth': 50.0, 'nst': 55, 'gap': 70.0,
        'rms': 1.1, 'depthError': 6.0, 'magNst': 35, 'year': 2020,
        'month_sin': 0.866, 'month_cos': 0.5, 'hour_sin': 0.259, 'hour_cos': 0.966,
        'region': 1, 'mag_mb': 0, 'mag_ml': 0, 'mag_md': 1, 'mag_Other': 0, 'mag': 6.3
    },
    {
        'latitude': 24.5, 'longitude': 93.0, 'depth': 35.0, 'nst': 40, 'gap': 90.0,
        'rms': 1.3, 'depthError': 7.0, 'magNst': 25, 'year': 2017,
        'month_sin': 0.5, 'month_cos': 0.866, 'hour_sin': -0.259, 'hour_cos': -0.966,
        'region': 5, 'mag_mb': 0, 'mag_ml': 1, 'mag_md': 0, 'mag_Other': 0, 'mag': 5.8
    },
    {
        'latitude': 36.0, 'longitude': 138.0, 'depth': 10.0, 'nst': 50, 'gap': 60.0,
        'rms': 1.8, 'depthError': 4.0, 'magNst': 45, 'year': 2021,
        'month_sin': 0.866, 'month_cos': 0.5, 'hour_sin': 0.707, 'hour_cos': -0.707,
        'region': 4, 'mag_mb': 0, 'mag_ml': 1, 'mag_md': 0, 'mag_Other': 0, 'mag': 6.1
    },
    {
        'latitude': 31.0, 'longitude': 78.0, 'depth': 18.0, 'nst': 35, 'gap': 120.0,
        'rms': 1.6, 'depthError': 12.0, 'magNst': 22, 'year': 2023,
        'month_sin': 0.259, 'month_cos': 0.966, 'hour_sin': 0.0, 'hour_cos': 1.0,
        'region': 6, 'mag_mb': 1, 'mag_ml': 0, 'mag_md': 0, 'mag_Other': 0, 'mag': 4.9
    },
    {
        'latitude': 37.5, 'longitude': 143.0, 'depth': 100.0, 'nst': 60, 'gap': 50.0,
        'rms': 1.2, 'depthError': 8.0, 'magNst': 50, 'year': 2022,
        'month_sin': 0.866, 'month_cos': 0.5, 'hour_sin': 0.5, 'hour_cos': -0.866,
        'region': 2, 'mag_mb': 0, 'mag_ml': 0, 'mag_md': 1, 'mag_Other': 0, 'mag': 7.2
    }
]

# Convert to DataFrame
test_df = pd.DataFrame(samples)
X_test = test_df.drop(columns=['mag'])
y_true = test_df['mag'].values

# Ensure column order matches training
X_test = X_test[df_clean.drop(columns=['mag']).columns]

# Predict
y_pred = model.predict(X_test)

# Print side-by-side comparison
print("\n🔍 Actual vs Predicted Magnitude (10 Historical Quake Examples)")
print("=" * 65)
print(f"{'#':<2} {'Lat':<6} {'Lon':<7} {'Depth':<5} {'NST':<4} {'Actual':<7} {'Predicted':<10} {'Error':<6}")
print("-" * 65)
for i in range(len(y_true)):
    error = abs(y_true[i] - y_pred[i])
    print(f"{i + 1:<2} "
          f"{test_df.iloc[i]['latitude']:<6.2f} "
          f"{test_df.iloc[i]['longitude']:<7.2f} "
          f"{test_df.iloc[i]['depth']:<5.1f} "
          f"{test_df.iloc[i]['nst']:<4.0f} "
          f"{y_true[i]:<7.2f} "
          f"{y_pred[i]:<10.2f} "
          f"{error:<6.2f}")

✅ Using live trained model

🔍 Actual vs Predicted Magnitude (10 Historical Quake Examples)
#  Lat    Lon     Depth NST  Actual  Predicted  Error 
-----------------------------------------------------------------
1  35.68  139.68  30.0  45   4.80    4.89       0.09  
2  27.80  86.00   15.0  60   7.80    3.96       3.84  
3  34.00  74.00   10.0  30   5.20    4.28       0.92  
4  38.00  142.00  25.0  70   9.00    5.18       3.82  
5  30.50  65.00   200.0 25   5.50    4.04       1.46  
6  40.00  140.00  50.0  55   6.30    3.74       2.56  
7  24.50  93.00   35.0  40   5.80    3.74       2.06  
8  36.00  138.00  10.0  50   6.10    3.96       2.14  
9  31.00  78.00   18.0  35   4.90    4.28       0.62  
10 37.50  143.00  100.0 60   7.20    3.74       3.46  
