In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

data_path = '/Users/xiaoguang_guo@mines.edu/Documents/voice_attack_data/script/features_extraction/no_trim/IO.csv'  # Update this path to your actual data file
data = pd.read_csv(data_path)

data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(data['label'])
X = data.drop('label', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

pipeline = ImbPipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('smote', SMOTE(random_state=42)),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=25)),  # Adding PCA with 25 components
    ('classifier', LogisticRegression(max_iter=1000, random_state=42, multi_class='multinomial', solver='lbfgs'))
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.47381996388960534
Classification Report:
              precision    recall  f1-score   support

           0       0.39      0.56      0.46        34
           1       0.14      0.05      0.07        43
           2       0.39      0.35      0.37        40
           3       0.47      0.53      0.49        38
           4       0.28      0.20      0.23        40
           5       0.29      0.26      0.28        42
           6       0.56      0.73      0.64        49
           7       0.44      0.47      0.45        43
           8       0.41      0.36      0.38        39
           9       0.18      0.12      0.14        41
          10       0.33      0.65      0.44        31
          11       0.47      0.30      0.37        46
          12       0.24      0.20      0.22        25
          13       0.42      0.52      0.47        33
          14       0.73      0.49      0.58        39
          15       0.42      0.33      0.37        46
          16       0.37     