Step 1: Import Libraries and Load Data

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.experimental import enable_iterative_imputer # Explicitly enable experimental feature
from sklearn.impute import IterativeImputer # Import IterativeImputer

df = pd.read_csv('preprocessed_earthquake_data.csv')

Step 2: Prepare Features and Target

In [19]:
features = df.drop(columns=['Magnitude Type', 'Type', 'Source', 'Status'])
target = df['Magnitude Type']
le = LabelEncoder()
y = le.fit_transform(target)
X = features.values

Step 3: Implement Cross-Validation

In [20]:
svc_model = SVC(random_state=42)

In [21]:
print("Performing 5-fold cross-validation...")
cv_scores = cross_val_score(svc_model, X, y, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation accuracy: {cv_scores.mean():.4f}")

Performing 5-fold cross-validation...
Cross-validation scores: [0.99658266 0.99807774 0.99850491 0.99850491 0.9299295 ]
Mean cross-validation accuracy: 0.9843


Step 4: Hyperparameter Tuning with GridSearchCV

In [24]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
grid_search = GridSearchCV(svc_model, param_grid, cv=5, scoring='accuracy', verbose=1)
print("\nStarting GridSearchCV for hyperparameter tuning...")
grid_search.fit(X, y)
print("\nBest parameters found by GridSearchCV:")
print(grid_search.best_params_)


Starting GridSearchCV for hyperparameter tuning...
Fitting 5 folds for each of 12 candidates, totalling 60 fits

Best parameters found by GridSearchCV:
{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}


Step 5: Evaluate Best Model on Full Dataset

In [23]:
print("Best cross-validated accuracy score:")
print(f"{grid_search.best_score_:.4f}")
best_model = grid_search.best_estimator_
print("\nBest model's details:")
print(best_model)

Best cross-validated accuracy score:
0.9886

Best model's details:
SVC(C=1, gamma='auto', random_state=42)
