In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data_path = '/Users/xiaoguang_guo@mines.edu/Documents/voice_attack_data/script/features_extraction/google/no_trim/IO.csv'  # Update this path to your actual data file
data = pd.read_csv(data_path)

# Handle infinite values and missing data
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)

# Separate the features and the target variable
X = data.drop('label', axis=1)  # Assume 'label' is the column name for the target variable
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define PCA transformation
pca = PCA(n_components=25)  # Using 25 components
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Define the kernels to test
kernels = ['linear', 'rbf', 'poly']

# Iterate over each kernel type
for kernel in kernels:
    svm_model = SVC(kernel=kernel, random_state=42)
    if kernel == 'poly':
        svm_model.degree = 3  # Default degree for poly kernel

    svm_model.fit(X_train_pca, y_train)
    y_pred = svm_model.predict(X_test_pca)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    # Print the result for the current kernel
    print(f"Kernel: {kernel}")
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(report)
    print("\n")


Kernel: linear
Accuracy: 0.4095
Classification Report:
              precision    recall  f1-score   support

           1       0.21      0.32      0.25        22
           2       0.31      0.28      0.30        39
           3       0.18      0.27      0.21        33
           4       0.11      0.11      0.11        27
           5       0.43      0.45      0.44        29
           6       0.30      0.29      0.30        34
           7       0.18      0.17      0.17        36
           8       0.16      0.29      0.21        24
           9       0.44      0.38      0.41        42
          10       0.12      0.12      0.12        26
          11       0.24      0.26      0.25        31
          13       0.10      0.11      0.11        27
          14       0.17      0.32      0.22        25
          15       0.15      0.11      0.13        35
          16       0.24      0.21      0.22        34
          17       0.34      0.40      0.37        35
          18       0.24   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Kernel: rbf
Accuracy: 0.3521
Classification Report:
              precision    recall  f1-score   support

           1       0.15      0.36      0.21        22
           2       0.21      0.26      0.23        39
           3       0.28      0.24      0.26        33
           4       0.05      0.04      0.04        27
           5       0.27      0.21      0.24        29
           6       0.39      0.32      0.35        34
           7       1.00      0.03      0.05        36
           8       0.07      0.08      0.07        24
           9       0.39      0.17      0.23        42
          10       0.12      0.19      0.14        26
          11       0.38      0.10      0.15        31
          13       0.07      0.11      0.08        27
          14       0.11      0.16      0.13        25
          15       0.26      0.17      0.21        35
          16       0.26      0.21      0.23        34
          17       0.21      0.26      0.23        35
          18       0.22      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [2]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data_path = '/Users/xiaoguang_guo@mines.edu/Documents/voice_attack_data/script/features_extraction/google/no_trim/IO.csv'  # Update this path to your actual data file
data = pd.read_csv(data_path)

# Handle infinite values and missing data
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)

# Separate the features and the target variable
X = data.drop('label', axis=1)  # Assume 'label' is the column name for the target variable
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define PCA transformation
pca = PCA(n_components=25)  # Using 25 components
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Define different C values to test
# C_values = [0.01, 0.1, 1, 10, 100]
C_values = [1]

# Iterate over each C value
for C in C_values:
    svm_model = SVC(kernel='linear', C=C, random_state=42)
    svm_model.fit(X_train_pca, y_train)
    y_pred = svm_model.predict(X_test_pca)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    # Print the result for the current C value
    print(f"C value: {C}")
    print(f"Accuracy: {accuracy:.4f}")
    # print("Classification Report:")
    # print(report)
    print("\n")


C value: 1
Accuracy: 0.4100


