In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv("Data_for_UCI_named.csv")
df

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,unstable


In [3]:
# Split into features (X) and target variable (y)
X = df.drop('stabf', axis=1)
y = df['stabf']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

### SVM Classifier

In [4]:
# Define the kernels you want to try
kernels = ['linear', 'poly', 'rbf']

# Train and evaluate SVM with each kernel
for kernel in kernels:
    # Create SVM classifier with the current kernel
    model = SVC(kernel=kernel)

    # Fit the model on the training data
    model.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Evaluate the model
    print(f'{kernel} kernel Results:')
    print(classification_report(y_test, y_pred))
    print("confusion_matrix:")
    print(confusion_matrix(y_test, y_pred))
    print()

linear kernel Results:
              precision    recall  f1-score   support

      stable       0.92      0.92      0.92       870
    unstable       0.96      0.96      0.96      1630

    accuracy                           0.94      2500
   macro avg       0.94      0.94      0.94      2500
weighted avg       0.94      0.94      0.94      2500

confusion_matrix:
[[ 798   72]
 [  66 1564]]

poly kernel Results:
              precision    recall  f1-score   support

      stable       0.84      0.90      0.87       870
    unstable       0.94      0.91      0.92      1630

    accuracy                           0.90      2500
   macro avg       0.89      0.90      0.90      2500
weighted avg       0.91      0.90      0.90      2500

confusion_matrix:
[[ 782   88]
 [ 152 1478]]

rbf kernel Results:
              precision    recall  f1-score   support

      stable       0.85      0.82      0.83       870
    unstable       0.91      0.92      0.91      1630

    accuracy              

### KNN Classifier

In [5]:
# Define K values to explore
param_grid = {'n_neighbors': range(1, 31)}

# Grid Search for optimal K
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Best K and performance
best_k = grid_search.best_params_['n_neighbors']
best_knn = KNeighborsClassifier(n_neighbors=best_k)
best_knn.fit(X_train, y_train)
y_pred_knn = best_knn.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print("Best K:", best_k)
print("KNN Accuracy:", accuracy_knn)
print(classification_report(y_test, y_pred_knn))

Best K: 15
KNN Accuracy: 0.7928
              precision    recall  f1-score   support

      stable       0.72      0.66      0.69       870
    unstable       0.83      0.86      0.84      1630

    accuracy                           0.79      2500
   macro avg       0.77      0.76      0.77      2500
weighted avg       0.79      0.79      0.79      2500



### Decision Tree Classifier

In [6]:
# Define depth values to explore
param_grid = {'max_depth': range(1, 11)}

# Grid Search for optimal depth
dt = DecisionTreeClassifier()
grid_search = GridSearchCV(dt, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Best depth and performance
best_depth = grid_search.best_params_['max_depth']
best_dt = DecisionTreeClassifier(max_depth=best_depth)
best_dt.fit(X_train, y_train)
y_pred_dt = best_dt.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print("Best Depth:", best_depth)
print("Decision Tree Accuracy:", accuracy_dt)
print(classification_report(y_test, y_pred_dt))

Best Depth: 1
Decision Tree Accuracy: 0.9996
              precision    recall  f1-score   support

      stable       1.00      1.00      1.00       870
    unstable       1.00      1.00      1.00      1630

    accuracy                           1.00      2500
   macro avg       1.00      1.00      1.00      2500
weighted avg       1.00      1.00      1.00      2500



Hyperparameter tuning is crucial in ML model development to find the optimal parameter values that maximize a model's performance. In this example:

SVM Kernels: The choice of kernel (linear, RBF, polynomial) affects how the model separates data. Different kernels may be better suited for various data distributions.

KNN: The n_neighbors parameter controls the number of neighbors considered for classification. A smaller k might lead to more flexible but potentially noisier models, while a larger k might make the model more robust but less sensitive to local patterns.

Decision Tree: The max_depth parameter limits the tree's depth, preventing overfitting. A deeper tree can capture complex patterns but might be more prone to overfitting on noisy data.