<a href="https://colab.research.google.com/github/Aayush077/AD_LAB/blob/main/Copy_of_earthquake_predictor_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Earthquake Prediction


##Importing Libraries

In [None]:
import pandas as pd
import numpy as np
from google.colab import data_table
import matplotlib.pyplot as plt
!pip install cartopy
import cartopy.crs as ccrs
import seaborn as sns

##Importing dataset

In [None]:
data_table.enable_dataframe_formatter()

df_raw = pd.read_csv('earthquake_1995-2023.csv')

display(df_raw.head())
display(df_raw.shape)

In [None]:
print(df_raw.columns)


###Visualization of data

In [None]:
plt.figure(figsize=(15,10))

ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_global()

ax.coastlines()
ax.stock_img()
ax.add_feature(__import__("cartopy").feature.BORDERS)

plt.scatter(df_raw['longitude'], df_raw['latitude'],s=2, color='red',
    transform=ccrs.PlateCarree()
)

plt.title("All Affected Areas (Earthquake Locations)", fontsize=16)
plt.show()


In [None]:
#Correlation HEATMAP
# Pick only the numeric columns and find how they relate
corr = df_raw.select_dtypes(include='number').corr()

# Draw the heatmap
plt.figure(figsize=(12,8))
sns.heatmap(corr, cmap='coolwarm', annot=True, fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()


##Preprocessing & Spliting


###Shows information about your dataset, Creates a safe copy to work on

In [None]:
print('Columns:', list(df_raw.columns))
print('\nDtypes:\n', df_raw.dtypes)
print('\nMissing values per column:\n', df_raw.isna().sum())

df = df_raw.copy()


In [None]:
# clean and standardize column names – only rename what exists in your dataset
col_map = {
    'mag': 'magnitude',
    'magnitude': 'magnitude',
    'date_time': 'date_time',
    'time': 'time',
    'latitude': 'latitude',
    'longitude': 'longitude',
    'depth': 'depth',
    'magType': 'magType',
    'nst': 'num_stations',
    'dmin': 'dmin',
    'gap': 'gap',
    'rms': 'rms',
    'mmi': 'mmi',
    'cdi': 'cdi',
    'alert': 'alert',
    'tsunami': 'tsunami',
    'country': 'country',
    'continent': 'continent',
    'location': 'location',
    'title': 'title',
    'sig': 'sig'
}

# Apply renaming only to matching columns
df = df.rename(columns={c: col_map[c] for c in df.columns if c in col_map})

print("Renamed columns:")
print(df.columns.tolist())


In [None]:
# Parse datetime column
if 'date_time' in df.columns:
    df['date_time'] = pd.to_datetime(df['date_time'], errors='coerce')
    print("\nInvalid datetime rows:", df['date_time'].isna().sum())

    df['year'] = df['date_time'].dt.year
    df['month'] = df['date_time'].dt.month
    df['day'] = df['date_time'].dt.day
    df['hour'] = df['date_time'].dt.hour

    df = df.set_index('date_time')


In [None]:
# Select useful features
feature_list = [
    'latitude','longitude','depth','num_stations','dmin','gap','rms',
    'mmi','cdi','tsunami','sig','year','month','hour',
    'magType','alert','continent','country','location','title'
]

keep_cols = [c for c in feature_list if c in df.columns]
print("Keeping columns:", keep_cols)

df_model = df[keep_cols + ['magnitude']].copy()

df_model = df_model.dropna(subset=['magnitude'])

#missing values handling
num_cols = df_model.select_dtypes(include=['number']).columns.drop('magnitude')
df_model[num_cols] = df_model[num_cols].fillna(df_model[num_cols].median())

cat_cols = df_model.select_dtypes(include=['object']).columns
df_model[cat_cols] = df_model[cat_cols].fillna('missing')

print("Missing values after cleaning:\n", df_model.isna().sum())


In [None]:
print(df_model['magnitude'].describe())
print("\nRounded magnitude counts:")
print(df_model['magnitude'].round().value_counts().sort_index())

### CORE PREPROCESSING AND SPLITING OF DATASET

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib

# Create a 'mag_class' column by rounding the 'magnitude' for classification
df_model['mag_class'] = df_model['magnitude'].round().astype(int)

# Remap 'mag_class' to be 0-indexed for XGBoost
# Get unique sorted classes to create a mapping
unique_classes = sorted(df_model['mag_class'].unique())
class_mapping = {old_val: new_val for new_val, old_val in enumerate(unique_classes)}
df_model['mag_class'] = df_model['mag_class'].map(class_mapping)

# Train-test split (CLASSIFICATION)
X = df_model.drop(columns=['magnitude', 'mag_class'])
y = df_model['mag_class']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train/Test shapes:", X_train.shape, X_test.shape)

# Identify numeric and categorical features
numeric_features = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

print("Numeric:", numeric_features)
print("Categorical:", categorical_features)

# Transformers
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(
    handle_unknown='ignore',
    sparse_output=False
)

# Build preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

##LOGISTIC REGRESSION

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Logistic Regression Pipeline
lr_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', LogisticRegression(max_iter=1000))
])

# Train
lr_model.fit(X_train, y_train)

# Predict
y_pred_lr = lr_model.predict(X_test)

# Evaluate
accuracy_lr = accuracy_score(y_test, y_pred_lr)

print("Logistic Regression Accuracy:", accuracy_lr)
print("\nClassification Report:\n", classification_report(y_test, y_pred_lr))


###Visualization(Actual Vs Predicted Logistic Reg)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_lr,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - Logistic Regression")
plt.show()


##KNN CLASSIFIER

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# KNN Classifier Pipeline
knn_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', KNeighborsClassifier(n_neighbors=5))
])

# Train
knn_model.fit(X_train, y_train)

# Predict
y_pred_knn = knn_model.predict(X_test)

# Evaluate
accuracy_knn = accuracy_score(y_test, y_pred_knn)

print("KNN Accuracy:", accuracy_knn)
print("\nClassification Report:\n", classification_report(y_test, y_pred_knn))


###Visulaization KNN

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_knn,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - KNN Classifier")
plt.show()


##NAIVE BAYES CLASSIFIER

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Naive Bayes Pipeline
nb_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', GaussianNB())
])

# Train
nb_model.fit(X_train, y_train)

# Predict
y_pred_nb = nb_model.predict(X_test)

# Evaluate
accuracy_nb = accuracy_score(y_test, y_pred_nb)

print("Naive Bayes Accuracy:", accuracy_nb)
print("\nClassification Report:\n", classification_report(y_test, y_pred_nb))


###VISUALIZATION NAIVE BAYES CLA

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_nb,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - Naive Bayes")
plt.show()


##ADABOOST CLASSIFIER

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

# AdaBoost Classifier Pipeline
ada_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', AdaBoostClassifier(
        n_estimators=200,
        learning_rate=0.5,
        random_state=42
    ))
])

# Train
ada_model.fit(X_train, y_train)

# Predict
y_pred_ada = ada_model.predict(X_test)

# Evaluate
accuracy_ada = accuracy_score(y_test, y_pred_ada)

print("AdaBoost Accuracy:", accuracy_ada)
print("\nClassification Report:\n", classification_report(y_test, y_pred_ada))


###VISUALIZATION ADABOOST

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_ada,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - AdaBoost Classifier")
plt.show()


##GRADIENT BOOSTING CLASSIFIER

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Gradient Boosting Classifier Pipeline
gb_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', GradientBoostingClassifier(
        n_estimators=200,
        learning_rate=0.1,
        max_depth=3,
        random_state=42
    ))
])

# Train
gb_model.fit(X_train, y_train)

# Predict
y_pred_gb = gb_model.predict(X_test)

# Evaluate
accuracy_gb = accuracy_score(y_test, y_pred_gb)

print("Gradient Boosting Accuracy:", accuracy_gb)
print("\nClassification Report:\n", classification_report(y_test, y_pred_gb))


###VISUALIZATION

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_gb,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - Gradient Boosting Classifier")
plt.show()


##EXTRA TREES

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report

# Extra Trees Classifier Pipeline
et_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', ExtraTreesClassifier(
        n_estimators=300,
        max_depth=None,
        random_state=42,
        n_jobs=-1
    ))
])

# Train
et_model.fit(X_train, y_train)

# Predict
y_pred_et = et_model.predict(X_test)

# Evaluate
accuracy_et = accuracy_score(y_test, y_pred_et)

print("Extra Trees Accuracy:", accuracy_et)
print("\nClassification Report:\n", classification_report(y_test, y_pred_et))


###VISUALIZATION

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_et,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - Extra Trees Classifier")
plt.show()


## SVC

In [None]:
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# SVC Pipeline
svr_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', SVC(kernel='rbf'))
])

# Train
svr_model.fit(X_train, y_train)

# Predict
y_pred_svr = svr_model.predict(X_test)

# Evaluate
accuracy_svr = accuracy_score(y_test, y_pred_svr)

print("SVC Accuracy:", accuracy_svr)
print("\nClassification Report:\n", classification_report(y_test, y_pred_svr))


###Visualization(Actual Vs Predicted SVC)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_svr,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - SVC")
plt.show()


##DECISION TREE CLASSIFIER

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# Decision Tree Classifier Pipeline
dt_model = Pipeline(steps=[
    ('pre', preprocessor),
    ('model', DecisionTreeClassifier(random_state=42))
])

# Train
dt_model.fit(X_train, y_train)

# Predict
y_pred_dt = dt_model.predict(X_test)

# Evaluate
accuracy_dt = accuracy_score(y_test, y_pred_dt)

print("Decision Tree Accuracy:", accuracy_dt)
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt))


###Visualization(Actual Vs Predicted Decision Tree Classifier)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_dt,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - Decision Tree Classifier")
plt.show()


## RANDOMFOREST CLASSIFIER

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Random Forest Classifier pipeline
rf_model = Pipeline([
    ("pre", preprocessor),
    ("model", RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        random_state=42,
        n_jobs=-1
    ))
])

# Fit model
rf_model.fit(X_train, y_train)

# Predict
y_pred_rf = rf_model.predict(X_test)

# Evaluate
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print(f"Random Forest Accuracy: {accuracy_rf:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))


### Visualization(Actual vs Predicted RandomForestCla)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_rf,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - Random Forest Classifier")
plt.show()


###Top Feature Importances (RandomForest)

In [None]:
# Extract the trained RandomForest model
rf = rf_model.named_steps["model"]

# Get encoded categorical feature names
ohe = rf_model.named_steps["pre"].named_transformers_["cat"]
cat_feature_names = ohe.get_feature_names_out(categorical_features).tolist()

# Combine with numeric feature names
all_feature_names = numeric_features + cat_feature_names

# Create importance dataframe
importances = pd.DataFrame({
    "feature": all_feature_names,
    "importance": rf.feature_importances_
})

# Sort and plot top 20
top_features = importances.sort_values("importance", ascending=False).head(20)

plt.figure(figsize=(10,6))
plt.barh(top_features["feature"], top_features["importance"], color="purple")
plt.gca().invert_yaxis()
plt.title("Top 20 Important Features (RandomForest)")
plt.xlabel("Importance Score")
plt.show()


## XG BOOST CLASSIFIER

In [None]:
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# XGBoost Classifier Pipeline
xgb_model = Pipeline([
    ("pre", preprocessor),
    ("model", XGBClassifier(
        objective='multi:softprob',
        n_estimators=200,
        max_depth=6,
        learning_rate=0.1,
        random_state=42,
        n_jobs=-1,
        eval_metric='mlogloss'
    ))
])

# Train
xgb_model.fit(X_train, y_train)

# Predict
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)

print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_xgb))


### Visualization(Actual Vs Predicted XGBoostCla)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_xgb,
    cmap='Blues',
    xticks_rotation=45
)

plt.title("Confusion Matrix - XGBoost Classifier")
plt.show()


###Top Feature Importances (SVC)

In [None]:
    # Extract the trained XGBoost model
    xgb = xgb_model.named_steps["model"]

    # Get feature names (numeric + encoded categorical)
    ohe = xgb_model.named_steps["pre"].named_transformers_["cat"]
    cat_feature_names = ohe.get_feature_names_out(categorical_features).tolist()

    all_feature_names = numeric_features + cat_feature_names

    # Get importance scores
    importances = pd.DataFrame({
        "feature": all_feature_names,
        "importance": xgb.feature_importances_
    })

    # Sort top 20
    top_features = importances.sort_values("importance", ascending=False).head(20)

    # Plot
    plt.figure(figsize=(10,6))
    plt.barh(top_features["feature"], top_features["importance"], color="green")
    plt.gca().invert_yaxis()
    plt.title("Top 20 Important Features (XGBoost)")
    plt.xlabel("Importance Score")
    plt.show()


##XGBOOST HYPERTUNING CLASSIFIER

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, f1_score
# XGBoost base model
xgb_clf = XGBClassifier(
    objective='multi:softprob',
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Hyperparameter search space
param_dist = {
    "model__n_estimators": [200, 300, 400],
    "model__max_depth": [3, 5, 7, 9],
    "model__learning_rate": [0.01, 0.05, 0.1],
    "model__subsample": [0.7, 0.8, 0.9, 1.0],
    "model__colsample_bytree": [0.7, 0.8, 0.9, 1.0]
}
# Pipeline
xgb_pipeline = Pipeline(steps=[
    ("pre", preprocessor),
    ("model", xgb_clf)
])
# Randomized Search
xgb_search = RandomizedSearchCV(
    estimator=xgb_pipeline,
    param_distributions=param_dist,
    n_iter=20,               # keep it reasonable
    scoring='f1_weighted',   # better than accuracy
    cv=3,
    verbose=1,
    random_state=42,
    n_jobs=-1
)
# Train with hyperparameter tuning
xgb_search.fit(X_train, y_train)


In [None]:
print("Best Parameters Found:")
print(xgb_search.best_params_)

In [None]:
# Predict with best model
y_pred_xgb_tuned = xgb_search.predict(X_test)

# Metrics
accuracy_xgb_tuned = accuracy_score(y_test, y_pred_xgb_tuned)
f1_xgb_tuned = f1_score(y_test, y_pred_xgb_tuned, average='weighted')

print("Tuned XGBoost Accuracy:", accuracy_xgb_tuned)
print("Tuned XGBoost F1-score:", f1_xgb_tuned)

In [None]:
print("Before Tuning Accuracy:", accuracy_xgb)
print("After Tuning Accuracy :", accuracy_xgb_tuned)

print("Before Tuning F1:", f1_xgb)
print("After Tuning F1 :", f1_xgb_tuned)


###VISUALIZATION

In [None]:
# XGBoost Performance Comparison

comparison = pd.DataFrame({
    "Version": ["Before Tuning", "After Tuning"],
    "Accuracy": [accuracy_xgb, accuracy_xgb_tuned],
    "F1-score": [f1_xgb, f1_xgb_tuned]
})

display(comparison)


In [None]:
plt.figure(figsize=(6,4))

plt.bar(
    comparison["Version"],
    comparison["Accuracy"],
    color=["steelblue", "seagreen"]
)

plt.title("XGBoost Accuracy: Before vs After Hyperparameter Tuning")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()


In [None]:
plt.figure(figsize=(6,4))

plt.bar(
    comparison["Version"],
    comparison["F1-score"],
    color=["darkorange", "green"]
)

plt.title("XGBoost F1-score: Before vs After Hyperparameter Tuning")
plt.ylabel("F1-score")
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()


##XGBOOST AFTER MERGE

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
import joblib
import numpy as np # Ensure numpy is imported

# Create a 'mag_class' column by rounding the 'magnitude' for classification
# This part is retained from previous steps to maintain context, but is not the target for the 3-class problem
df_model['mag_class'] = df_model['magnitude'].round().astype(int)

# Remap 'mag_class' to be 0-indexed for XGBoost
unique_classes_mag = sorted(df_model['mag_class'].unique())
class_mapping_mag = {old_val: new_val for new_val, old_val in enumerate(unique_classes_mag)}
df_model['mag_class'] = df_model['mag_class'].map(class_mapping_mag)

# Create a new merged severity class with adjusted thresholds for multi-class classification
def merge_severity(mag):
    if mag < 7.0:
        return "Moderate"
    elif mag < 8.0:
        return "Strong"
    else:
        return "Major"

df_model["severity_3class"] = df_model["magnitude"].apply(merge_severity)

# Check distribution
print("Severity 3-class distribution:\n", df_model["severity_3class"].value_counts())

# Encode string labels to numerical labels for XGBoost
le = LabelEncoder()
df_model["severity_3class_encoded"] = le.fit_transform(df_model["severity_3class"])
print("Encoded severity classes:", le.classes_)
print("Mapping: ", list(zip(le.classes_, le.transform(le.classes_))))

# Train-test split for the 3-class classification problem
# X should exclude 'magnitude', original 'severity_3class' string, and 'severity_3class_encoded' (if it's the target)
X = df_model.drop(columns=["magnitude", "severity_3class", "severity_3class_encoded"])
y = df_model["severity_3class_encoded"] # Use the encoded numerical labels

# Re-add stratify=y since there are now multiple classes
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("\nTrain/Test shapes:", X_train.shape, X_test.shape)
print("y_train unique values:", y_train.unique())
print("y_test unique values:", y_test.unique())

# Identify numeric and categorical features from the new X
numeric_features = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

print("Numeric features for preprocessor:", numeric_features)
print("Categorical features for preprocessor:", categorical_features)

# Transformers
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(
    handle_unknown='ignore',
    sparse_output=False
)

# Build preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)


In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

xgb_3class = Pipeline(steps=[
    ("pre", preprocessor),
    ("model", XGBClassifier(
        objective="multi:softprob",
        eval_metric="mlogloss",
        random_state=42,
        n_estimators=300,
        max_depth=6,
        learning_rate=0.1,
        n_jobs=-1
    ))
])

# Train
xgb_3class.fit(X_train, y_train)

# Predict
y_pred_3class = xgb_3class.predict(X_test)

# Evaluate
accuracy_3class = accuracy_score(y_test, y_pred_3class)
f1_3class = f1_score(y_test, y_pred_3class, average="weighted")

print("3-Class XGBoost Accuracy:", accuracy_3class)
print("3-Class XGBoost F1-score:", f1_3class)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_3class))

###VISULAIZATION

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred_3class,
    cmap="Blues",
    xticks_rotation=45
)

plt.title("Confusion Matrix – 3-Class Earthquake Severity")
plt.show()


##RESULTS

In [None]:
from sklearn.metrics import f1_score

f1_lr  = f1_score(y_test, y_pred_lr,  average='weighted')
f1_svr = f1_score(y_test, y_pred_svr, average='weighted')
f1_dt  = f1_score(y_test, y_pred_dt,  average='weighted')
f1_rf  = f1_score(y_test, y_pred_rf,  average='weighted')
f1_xgb = f1_score(y_test, y_pred_xgb, average='weighted')
f1_knn = f1_score(y_test, y_pred_knn, average='weighted')
f1_nb  = f1_score(y_test, y_pred_nb,  average='weighted')

f1_ada = f1_score(y_test, y_pred_ada, average='weighted')
f1_gb  = f1_score(y_test, y_pred_gb,  average='weighted')
f1_et  = f1_score(y_test, y_pred_et,  average='weighted')





In [None]:
# Final Classification Results Summary

results = []

results.append({"Model": "Logistic Regression", "Accuracy": accuracy_lr, "F1-score": f1_lr})
results.append({"Model": "SVC",                "Accuracy": accuracy_svr, "F1-score": f1_svr})
results.append({"Model": "Decision Tree",      "Accuracy": accuracy_dt, "F1-score": f1_dt})
results.append({"Model": "Random Forest",      "Accuracy": accuracy_rf, "F1-score": f1_rf})
results.append({"Model": "XGBoost",            "Accuracy": accuracy_xgb, "F1-score": f1_xgb})
results.append({"Model": "KNN",                "Accuracy": accuracy_knn, "F1-score": f1_knn})
results.append({"Model": "Naive Bayes",        "Accuracy": accuracy_nb, "F1-score": f1_nb})
results.append({"Model": "AdaBoost",           "Accuracy": accuracy_ada, "F1-score": f1_ada})
results.append({"Model": "Gradient Boosting",  "Accuracy": accuracy_gb, "F1-score": f1_gb})
results.append({"Model": "Extra Trees",        "Accuracy": accuracy_et, "F1-score": f1_et})

res_df = pd.DataFrame(results)
display(res_df)


In [None]:
# Count misclassifications per class
errors = y_test != y_pred_rf
error_df = pd.DataFrame({
    "Actual Class": y_test,
    "Predicted Class": y_pred_rf,
    "Error": errors
})

error_counts = error_df[error_df["Error"]].groupby("Actual Class").size()

plt.figure(figsize=(8,5))
error_counts.plot(kind='bar')
plt.title("Misclassification Count per Class (Random Forest)")
plt.xlabel("Actual Class")
plt.ylabel("Number of Errors")
plt.grid(True)
plt.show()


In [None]:
errors = y_test != y_pred_xgb

error_df = pd.DataFrame({
    "Actual Class": y_test,
    "Predicted Class": y_pred_xgb,
    "Error": errors
})

error_counts = error_df[error_df["Error"]].groupby("Actual Class").size()

plt.figure(figsize=(8,5))
error_counts.plot(kind='bar')
plt.title("Misclassification Count per Class (XGBoost)")
plt.xlabel("Actual Class")
plt.ylabel("Number of Errors")
plt.grid(True)
plt.show()


In [None]:
plt.figure(figsize=(11,5))

plt.bar(res_df["Model"], res_df["Accuracy"], color="steelblue")

plt.title("Accuracy Comparison of Classification Models")
plt.xlabel("Model")
plt.ylabel("Accuracy")
plt.ylim(0, 1)

plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()


In [None]:
plt.figure(figsize=(11,5))

plt.bar(res_df["Model"], res_df["F1-score"], color="darkorange")

plt.title("F1-score Comparison of Classification Models")
plt.xlabel("Model")
plt.ylabel("F1-score")
plt.ylim(0, 1)

plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()


In [None]:
df_model.to_csv("earthquake_cleaned.csv", index=True)
print("Cleaned dataset saved as earthquake_cleaned.csv")
