In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer


In [2]:
df = pd.read_csv("C:\\Users\\HP\\Downloads\\earthquake_alert_balanced_dataset.csv")

In [3]:
df.head()

Unnamed: 0,magnitude,depth,cdi,mmi,sig,alert
0,7.0,14,8,7,0,green
1,6.9,25,4,4,-33,green
2,7.0,579,3,3,-13,green
3,7.3,37,5,5,65,green
4,6.6,624,0,2,-98,green


In [4]:
df.duplicated().any()


True

In [5]:
df.drop_duplicates(inplace=True)


In [6]:
numeric_cols = ['magnitude', 'depth', 'cdi', 'mmi', 'sig']


In [7]:
df[numeric_cols] = df[numeric_cols].apply(
    pd.to_numeric, errors='coerce'
)


In [8]:
imputer = SimpleImputer(strategy='median')
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])


In [9]:
label_encoder = LabelEncoder()
df['alert_encoded'] = label_encoder.fit_transform(df['alert'])


In [10]:
X = df[numeric_cols]
y = df['alert_encoded']


In [11]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [12]:
print("Preprocessed data shape:", X_scaled.shape)
print("Target shape:", y.shape)


Preprocessed data shape: (1256, 5)
Target shape: (1256,)


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC


In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42,stratify=y
)


In [15]:
log_reg = LogisticRegression(max_iter=1000)

log_reg.fit(X_train, y_train)

y_pred_log = log_reg.predict(X_test)

print(" Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print(confusion_matrix(y_test, y_pred_log))
print(classification_report(y_test, y_pred_log))


 Logistic Regression
Accuracy: 0.6547619047619048
[[51  1  1 12]
 [ 0 29 15 19]
 [ 0 11 49  0]
 [ 9 14  5 36]]
              precision    recall  f1-score   support

           0       0.85      0.78      0.82        65
           1       0.53      0.46      0.49        63
           2       0.70      0.82      0.75        60
           3       0.54      0.56      0.55        64

    accuracy                           0.65       252
   macro avg       0.65      0.66      0.65       252
weighted avg       0.65      0.65      0.65       252



In [16]:
dt = DecisionTreeClassifier(
    max_depth=10,
    random_state=42
)

dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("\nðŸ”¹ Decision Tree")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print(confusion_matrix(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))



ðŸ”¹ Decision Tree
Accuracy: 0.8650793650793651
[[50  1  1 13]
 [ 2 55  3  3]
 [ 0  0 59  1]
 [ 5  3  2 54]]
              precision    recall  f1-score   support

           0       0.88      0.77      0.82        65
           1       0.93      0.87      0.90        63
           2       0.91      0.98      0.94        60
           3       0.76      0.84      0.80        64

    accuracy                           0.87       252
   macro avg       0.87      0.87      0.87       252
weighted avg       0.87      0.87      0.86       252



In [17]:
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)

rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("\n Random Forest")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))



 Random Forest
Accuracy: 0.9206349206349206
[[54  0  1 10]
 [ 0 61  2  0]
 [ 0  0 60  0]
 [ 2  3  2 57]]
              precision    recall  f1-score   support

           0       0.96      0.83      0.89        65
           1       0.95      0.97      0.96        63
           2       0.92      1.00      0.96        60
           3       0.85      0.89      0.87        64

    accuracy                           0.92       252
   macro avg       0.92      0.92      0.92       252
weighted avg       0.92      0.92      0.92       252



In [18]:
svm = SVC(kernel='rbf', C=1, gamma='scale')

svm.fit(X_train, y_train)

y_pred_svm = svm.predict(X_test)

print("\nðŸ”¹ Support Vector Machine (SVM)")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print(confusion_matrix(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))



ðŸ”¹ Support Vector Machine (SVM)
Accuracy: 0.7817460317460317
[[50  3  1 11]
 [ 0 45 16  2]
 [ 0  7 53  0]
 [ 2 10  3 49]]
              precision    recall  f1-score   support

           0       0.96      0.77      0.85        65
           1       0.69      0.71      0.70        63
           2       0.73      0.88      0.80        60
           3       0.79      0.77      0.78        64

    accuracy                           0.78       252
   macro avg       0.79      0.78      0.78       252
weighted avg       0.79      0.78      0.78       252



In [19]:

gb = GradientBoostingClassifier(
    n_estimators=150,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

gb.fit(X_train, y_train)

y_pred_gb = gb.predict(X_test)

print("\nðŸ”¹ Gradient Boosting")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print(confusion_matrix(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))




ðŸ”¹ Gradient Boosting
Accuracy: 0.9325396825396826
[[59  1  1  4]
 [ 0 61  1  1]
 [ 0  1 59  0]
 [ 4  2  2 56]]
              precision    recall  f1-score   support

           0       0.94      0.91      0.92        65
           1       0.94      0.97      0.95        63
           2       0.94      0.98      0.96        60
           3       0.92      0.88      0.90        64

    accuracy                           0.93       252
   macro avg       0.93      0.93      0.93       252
weighted avg       0.93      0.93      0.93       252



In [20]:
correct = (y_pred_gb == y_test).sum()
total = len(y_test)

print(f"Correct Predictions: {correct}/{total}")


Correct Predictions: 235/252


In [21]:
import joblib
joblib.dump(rf, "random_forest_model.pkl")


['random_forest_model.pkl']