In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data_path = '/Users/xiaoguang_guo@mines.edu/Documents/voice_attack_data/script/features_extraction/IO.csv'  # Replace with your actual data path
data = pd.read_csv(data_path)

# Handle infinite values and missing data
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)

# Separate the features and the target variable
X = data.drop('label', axis=1)  # Assume 'label' is the column name for the target variable
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define PCA transformation
pca = PCA(n_components=25)  # Using 25 components
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Define the kernels to test
kernels = ['linear', 'rbf', 'poly']

# Iterate over each kernel type
for kernel in kernels:
    svm_model = SVC(kernel=kernel, random_state=42)
    if kernel == 'poly':
        svm_model.degree = 3  # Default degree for poly kernel

    svm_model.fit(X_train_pca, y_train)
    y_pred = svm_model.predict(X_test_pca)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    # Print the result for the current kernel
    print(f"Kernel: {kernel}")
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(report)
    print("\n")


Kernel: linear
Accuracy: 0.5631
Classification Report:
              precision    recall  f1-score   support

           1       0.43      0.76      0.55        34
           2       0.26      0.30      0.28        43
           3       0.55      0.70      0.62        40
           4       0.47      0.50      0.49        38
           5       0.25      0.35      0.29        40
           6       0.44      0.36      0.39        42
           7       0.70      0.80      0.74        49
           8       0.54      0.58      0.56        43
           9       0.55      0.54      0.55        39
          10       0.28      0.22      0.25        41
          11       0.46      0.81      0.59        31
          12       0.55      0.37      0.44        46
          13       0.31      0.32      0.31        25
          14       0.54      0.58      0.56        33
          15       0.68      0.59      0.63        39
          16       0.54      0.33      0.41        46
          17       0.29   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [2]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data_path = '/Users/xiaoguang_guo@mines.edu/Documents/voice_attack_data/script/features_extraction/IO.csv'  # Replace with your actual data path
data = pd.read_csv(data_path)

# Handle infinite values and missing data
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)

# Separate the features and the target variable
X = data.drop('label', axis=1)  # Assume 'label' is the column name for the target variable
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define PCA transformation
pca = PCA(n_components=25)  # Using 25 components
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Define different C values to test
C_values = [0.01, 0.1, 1, 10, 100]

# Iterate over each C value
for C in C_values:
    svm_model = SVC(kernel='linear', C=C, random_state=42)
    svm_model.fit(X_train_pca, y_train)
    y_pred = svm_model.predict(X_test_pca)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    # Print the result for the current C value
    print(f"C value: {C}")
    print(f"Accuracy: {accuracy:.4f}")
    # print("Classification Report:")
    # print(report)
    print("\n")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


C value: 0.01
Accuracy: 0.4275


C value: 0.1
Accuracy: 0.5320


C value: 1
Accuracy: 0.5628


C value: 10
Accuracy: 0.5606


C value: 100
Accuracy: 0.5596


