In [2]:

# Step 1: Import libraries

import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# Step 2: Load dataset

df = pd.read_csv("C:\\Users\\HP\\Downloads\\earthquake_alert_balanced_dataset.csv")


# Step 3: Clean column names

df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')


# Step 4: Drop duplicates

df.drop_duplicates(inplace=True)


# Step 5: Convert numeric columns

numeric_cols = ['magnitude', 'depth', 'cdi', 'mmi', 'sig']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')


# Step 6: Impute missing values

imputer = SimpleImputer(strategy='median')
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])


# Step 7: Encode target

label_encoder = LabelEncoder()
df['alert_encoded'] = label_encoder.fit_transform(df['alert'])


# Step 8: Split BEFORE scaling

X = df[numeric_cols]
y = df['alert_encoded']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Step 9: Scale numeric features
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)


# Step 10: Verify no NaN

print("NaNs in X_train:\n", X_train.isnull().sum())
print("NaNs in X_test:\n", X_test.isnull().sum())





NaNs in X_train:
 magnitude    0
depth        0
cdi          0
mmi          0
sig          0
dtype: int64
NaNs in X_test:
 magnitude    0
depth        0
cdi          0
mmi          0
sig          0
dtype: int64


In [3]:
# Step 11: Train Logistic Regression

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

y_pred_log = log_reg.predict(X_test)

print("\nLogistic Regression Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_log))
print("Classification Report:\n", classification_report(y_test, y_pred_log))


# Step 12: Train Decision Tree

dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train, y_train)

y_pred_tree = dtree.predict(X_test)

print("\nDecision Tree Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_tree))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tree))
print("Classification Report:\n", classification_report(y_test, y_pred_tree))


Logistic Regression Performance:
Accuracy: 0.6706349206349206
Confusion Matrix:
 [[54  3  3 13]
 [ 0 29 12 23]
 [ 0  8 54  1]
 [10  4  6 32]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.74      0.79        73
           1       0.66      0.45      0.54        64
           2       0.72      0.86      0.78        63
           3       0.46      0.62      0.53        52

    accuracy                           0.67       252
   macro avg       0.67      0.67      0.66       252
weighted avg       0.69      0.67      0.67       252


Decision Tree Performance:
Accuracy: 0.8809523809523809
Confusion Matrix:
 [[59  5  2  7]
 [ 0 60  1  3]
 [ 0  4 58  1]
 [ 7  0  0 45]]
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.81      0.85        73
           1       0.87      0.94      0.90        64
           2       0.95      0.92      0.94        63
           3       0

In [4]:

import numpy as np


manual_input = {
    'magnitude': 5.5,
    'depth': 10,
    'cdi': 3.2,
    'mmi': 4.5,
    'sig': 150
}


manual_df = pd.DataFrame([manual_input])


manual_scaled = scaler.transform(manual_df)

pred_log = log_reg.predict(manual_scaled)[0]
pred_label_log = label_encoder.inverse_transform([pred_log])[0]

# Decision Tree prediction
pred_tree = dtree.predict(manual_scaled)[0]
pred_label_tree = label_encoder.inverse_transform([pred_tree])[0]

print(f"Predicted alert (Logistic Regression): {pred_label_log}")
print(f"Predicted alert (Decision Tree): {pred_label_tree}")


Predicted alert (Logistic Regression): green
Predicted alert (Decision Tree): green




In [5]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    max_depth=10
)

rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("\nRandom Forest Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))



Random Forest Performance:
Accuracy: 0.8928571428571429
Confusion Matrix:
 [[61  3  1  8]
 [ 0 59  4  1]
 [ 0  3 59  1]
 [ 4  2  0 46]]
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.84      0.88        73
           1       0.88      0.92      0.90        64
           2       0.92      0.94      0.93        63
           3       0.82      0.88      0.85        52

    accuracy                           0.89       252
   macro avg       0.89      0.89      0.89       252
weighted avg       0.90      0.89      0.89       252



In [6]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(
    n_estimators=150,
    learning_rate=0.05,
    random_state=42
)

gb.fit(X_train, y_train)

y_pred_gb = gb.predict(X_test)

print("\nGradient Boosting Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb))



Gradient Boosting Performance:
Accuracy: 0.8888888888888888
Confusion Matrix:
 [[62  2  2  7]
 [ 0 61  2  1]
 [ 0  4 57  2]
 [ 6  2  0 44]]
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.85      0.88        73
           1       0.88      0.95      0.92        64
           2       0.93      0.90      0.92        63
           3       0.81      0.85      0.83        52

    accuracy                           0.89       252
   macro avg       0.89      0.89      0.89       252
weighted avg       0.89      0.89      0.89       252



In [7]:
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=2, gamma='scale')

svm.fit(X_train, y_train)

y_pred_svm = svm.predict(X_test)

print("\nSVM Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))



SVM Performance:
Accuracy: 0.8015873015873016
Confusion Matrix:
 [[59  5  1  8]
 [ 0 44 19  1]
 [ 0  3 59  1]
 [ 5  6  1 40]]
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.81      0.86        73
           1       0.76      0.69      0.72        64
           2       0.74      0.94      0.83        63
           3       0.80      0.77      0.78        52

    accuracy                           0.80       252
   macro avg       0.80      0.80      0.80       252
weighted avg       0.81      0.80      0.80       252



In [8]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=7)

knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)

print("\nKNN Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))



KNN Performance:
Accuracy: 0.8571428571428571
Confusion Matrix:
 [[58  3  2 10]
 [ 0 54  7  3]
 [ 0  3 60  0]
 [ 3  4  1 44]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.79      0.87        73
           1       0.84      0.84      0.84        64
           2       0.86      0.95      0.90        63
           3       0.77      0.85      0.81        52

    accuracy                           0.86       252
   macro avg       0.86      0.86      0.85       252
weighted avg       0.86      0.86      0.86       252

