In [5]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import Normalizer, MaxAbsScaler, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

### Normal prediction

In [13]:
# load dataset
X = pd.read_csv('parkinsons.data')
# or:
# url = "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data"
# data = pd.read_csv(url)
y = X['status']
X.drop(columns=['name', 'status'], inplace=True)

# train test plit
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

# fit and predict
clf = SVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# evaluation
print(accuracy_score(y_test, y_pred))
print(f1_score(y_test, y_pred))

0.7627118644067796
0.86


### Find best params (C, gamma) for RBF kernel 

In [17]:
C_values = np.arange(0.05, 2.05, 0.05)
gamma_values = np.arange(0.001, 0.101, 0.001)

best_score = 0
best_f1 = 0
best_params = {"C": None, "gamma": None}

for c in C_values:
    for gamma in gamma_values:
        clf = SVC(C=c, gamma=gamma, kernel='rbf')
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='macro')

        if f1 > best_f1:
            best_score = accuracy
            best_params["C"] = c
            best_params["gamma"] = gamma
            best_f1 = f1

print(f"Best Accuracy: {best_score}")
print(f"Best F1 Score: {best_f1}")
print(f"Best Parameters: C = {best_params['C']}, gamma = {best_params['gamma']}")

Best Accuracy: 0.9152542372881356
Best F1 Score: 0.8431685273790537
Best Parameters: C = 1.6500000000000001, gamma = 0.005


### Find suitable Scaler

In [18]:
scalers = {
    'Normalizer': Normalizer(),
    'MaxAbsScaler': MaxAbsScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler()
}

best_score = 0
best_f1 = 0
best_scaler = None

for name, scaler in scalers.items():
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = SVC(kernel='rbf')
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"{name} - Accuracy: {accuracy}, F1 Score: {f1}")
    
    if f1 > best_f1:
        best_score = accuracy
        best_f1 = f1
        best_scaler = name

print(f"Best Preprocessing Method: {best_scaler}")
print(f"Best Accuracy: {best_score}")
print(f"Best F1 Score: {best_f1}")

Normalizer - Accuracy: 0.7966101694915254, F1 Score: 0.8867924528301887
MaxAbsScaler - Accuracy: 0.8813559322033898, F1 Score: 0.9306930693069307
MinMaxScaler - Accuracy: 0.9152542372881356, F1 Score: 0.9494949494949495
StandardScaler - Accuracy: 0.9152542372881356, F1 Score: 0.9494949494949495
Best Preprocessing Method: MinMaxScaler
Best Accuracy: 0.9152542372881356
Best F1 Score: 0.9494949494949495


### Suitable Scaler on best params

In [19]:
scalers = {
    'Normalizer': Normalizer(),
    'MaxAbsScaler': MaxAbsScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler()
}

best_score = 0
best_f1 = 0
best_scaler = None

for name, scaler in scalers.items():
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = SVC(C=best_params['C'], gamma=best_params['gamma'], kernel='rbf')
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"{name} - Accuracy: {accuracy}, F1 Score: {f1}")
    
    if accuracy > best_score:
        best_score = accuracy
        best_f1 = f1
        best_scaler = name

print(f"Best Preprocessing Method: {best_scaler}")
print(f"Best Accuracy: {best_score}")
print(f"Best F1 Score: {best_f1}")

Normalizer - Accuracy: 0.7966101694915254, F1 Score: 0.8867924528301887
MaxAbsScaler - Accuracy: 0.7966101694915254, F1 Score: 0.8867924528301887
MinMaxScaler - Accuracy: 0.7966101694915254, F1 Score: 0.8867924528301887
StandardScaler - Accuracy: 0.847457627118644, F1 Score: 0.9090909090909091
Best Preprocessing Method: StandardScaler
Best Accuracy: 0.847457627118644
Best F1 Score: 0.9090909090909091
