In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
import numpy as np

In [3]:
# Load the dataset
df = pd.read_csv('Data_for_UCI_named.csv')

# Print the dimension of the dataset
print('Dataset shape:', df.shape)

Dataset shape: (10000, 14)


In [4]:
# Prepare features and target
X = df.drop(['stabf'], axis=1)
y = df['stabf']

# Encode target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

kernels = ['linear', 'rbf', 'poly']
results = {}

for kernel in kernels:
    svm = SVC(kernel=kernel, random_state=42)
    svm.fit(X_train_scaled, y_train)
    y_pred = svm.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=le.classes_)
    cm = confusion_matrix(y_test, y_pred)
    results[kernel] = {
        'accuracy': acc,
        'classification_report': report,
        'confusion_matrix': cm,
        'hyperparameters': svm.get_params()
    }
    print(f'\nKernel: {kernel}')
    print('Accuracy:', acc)
    print('Classification Report:\n', report)
    print('Confusion Matrix:\n', cm)
    print('Used Hyperparameters:', svm.get_params())


Kernel: linear
Accuracy: 0.9955
Classification Report:
               precision    recall  f1-score   support

      stable       0.99      0.99      0.99       724
    unstable       1.00      1.00      1.00      1276

    accuracy                           1.00      2000
   macro avg       0.99      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

Confusion Matrix:
 [[ 720    4]
 [   5 1271]]
Used Hyperparameters: {'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': 42, 'shrinking': True, 'tol': 0.001, 'verbose': False}

Kernel: rbf
Accuracy: 0.99
Classification Report:
               precision    recall  f1-score   support

      stable       0.99      0.98      0.99       724
    unstable       0.99      0.99      0.99      1276

    accuracy                           0.99      20

In [6]:
# SVM: Try different C values for each kernel
C_values = [0.01, 0.1, 1, 10, 100]
svm_results = {}

for kernel in kernels:
    best_acc = 0
    best_C = None
    for C in C_values:
        svm = SVC(kernel=kernel, C=C, random_state=42)
        svm.fit(X_train_scaled, y_train)
        y_pred = svm.predict(X_test_scaled)
        acc = accuracy_score(y_test, y_pred)
        if acc > best_acc:
            best_acc = acc
            best_C = C
        print(f'Kernel: {kernel}, C: {C}, Accuracy: {acc:.4f}')
    print(f'Best C for kernel {kernel}: {best_C} with accuracy {best_acc:.4f}')
    svm_results[kernel] = {'best_C': best_C, 'best_accuracy': best_acc}

Kernel: linear, C: 0.01, Accuracy: 0.9910
Kernel: linear, C: 0.1, Accuracy: 0.9950
Kernel: linear, C: 1, Accuracy: 0.9955
Kernel: linear, C: 10, Accuracy: 0.9965
Kernel: linear, C: 100, Accuracy: 0.9980
Best C for kernel linear: 100 with accuracy 0.9980
Kernel: rbf, C: 0.01, Accuracy: 0.9500
Kernel: rbf, C: 0.1, Accuracy: 0.9870
Kernel: rbf, C: 1, Accuracy: 0.9900
Kernel: rbf, C: 10, Accuracy: 0.9850
Kernel: rbf, C: 100, Accuracy: 0.9835
Best C for kernel rbf: 1 with accuracy 0.9900
Kernel: poly, C: 0.01, Accuracy: 0.8795
Kernel: poly, C: 0.1, Accuracy: 0.9690
Kernel: poly, C: 1, Accuracy: 0.9765
Kernel: poly, C: 10, Accuracy: 0.9800
Kernel: poly, C: 100, Accuracy: 0.9810
Best C for kernel poly: 100 with accuracy 0.9810


In [8]:
# Decision Tree: Tune max_depth
depths = range(1, 21)
best_dt_acc = 0
best_depth = None

for depth in depths:
    dt = DecisionTreeClassifier(max_depth=depth, random_state=42)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    if acc > best_dt_acc:
        best_dt_acc = acc
        best_depth = depth
    print(f'Decision Tree depth: {depth}, Accuracy: {acc:.4f}')
print(f'Best depth for Decision Tree: {best_depth} with accuracy {best_dt_acc:.4f}')

Decision Tree depth: 1, Accuracy: 1.0000
Decision Tree depth: 2, Accuracy: 1.0000
Decision Tree depth: 3, Accuracy: 1.0000
Decision Tree depth: 4, Accuracy: 1.0000
Decision Tree depth: 5, Accuracy: 1.0000
Decision Tree depth: 6, Accuracy: 1.0000
Decision Tree depth: 7, Accuracy: 1.0000
Decision Tree depth: 8, Accuracy: 1.0000
Decision Tree depth: 9, Accuracy: 1.0000
Decision Tree depth: 10, Accuracy: 1.0000
Decision Tree depth: 11, Accuracy: 1.0000
Decision Tree depth: 12, Accuracy: 1.0000
Decision Tree depth: 13, Accuracy: 1.0000
Decision Tree depth: 14, Accuracy: 1.0000
Decision Tree depth: 15, Accuracy: 1.0000
Decision Tree depth: 16, Accuracy: 1.0000
Decision Tree depth: 17, Accuracy: 1.0000
Decision Tree depth: 18, Accuracy: 1.0000
Decision Tree depth: 19, Accuracy: 1.0000
Decision Tree depth: 20, Accuracy: 1.0000
Best depth for Decision Tree: 1 with accuracy 1.0000


# Understanding Hyperparameter Tuning in ML Model Development
Hyperparameter tuning is a crucial step in machine learning model development. Hyperparameters, such as the regularization parameter `C` in SVMs or the `max_depth` in Decision Trees, control the learning process and model complexity. Proper tuning helps to balance underfitting and overfitting, leading to better generalization on unseen data. Grid search or manual search over a range of values, as demonstrated above, allows us to find the optimal settings for our models. The choice of hyperparameters can significantly impact model performance, and their selection should be guided by validation results and domain knowledge.