# classify

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import cross_val_predict

def scale_and_pca(data, prefix=None, n_components=20, random_state=42):
    scaler = StandardScaler()
    pca = PCA(n_components=n_components, random_state=random_state)
    return pd.DataFrame(
        pca.fit_transform(scaler.fit_transform(data)), 
        columns=[f'{prefix}_PC{i+1}' for i in range(n_components)]
    )


# Read data
E = pd.read_csv(r'result\machine_learning\merged_emotion_features.csv')
I = pd.read_csv(r'result\machine_learning\merged_au_intensities.csv')
C = pd.read_csv(r'result\machine_learning\merged_au_correlations.csv')

# Reorder rows
name_order = E['姓名'].tolist()
I = I.set_index('姓名').reindex(name_order).reset_index()
C = C.set_index('姓名').reindex(name_order).reset_index()

group, ABC, CABS = E['group'], E['ABC'], E['克氏']
E = E[[col for col in E.columns if col not in ['姓名', 'group', 'ABC', '克氏']]]
I = I[[col for col in I.columns if col not in ['姓名', 'group', 'ABC', '克氏']]]
C = C[[col for col in C.columns if col not in ['姓名', 'group', 'ABC', '克氏']]]

E = E.fillna(0)
I = I.fillna(0)
C = C.fillna(0)

E = scale_and_pca(E, 'E', n_components=25)
I = scale_and_pca(I, 'I', n_components=25)
C = scale_and_pca(C, 'IC', n_components=25)


# Merge features
EI = pd.concat([E, I], axis=1)
EC = pd.concat([E, C], axis=1)
IC = pd.concat([C, I], axis=1)
EIC = pd.concat([E, I, C], axis=1)

feature_sets = {
    'E': E,
    'EI': EI,
    'EC': EC,
    'IC': C,
    'EIC': EIC
}

# E.to_csv(r'result\classify_and_regression\E.csv', index=0)
# EI.to_csv(r'result\classify_and_regression\EI.csv', index=0)
# EC.to_csv(r'result\classify_and_regression\EC.csv', index=0)
# IC.to_csv(r'result\classify_and_regression\IC.csv', index=0)
# EIC.to_csv(r'result\classify_and_regression\EIC.csv', index=0)
# group.to_csv(r'result\classify_and_regression\group.csv', index=0)

# Print header
print("=" * 100)
print(f"{'Feature Type':<15} {'Precision':<10} {'Recall':<10} {'Accuracy':<10} {'F1-score':<10} {'AUC-ROC':<10}")
print("-" * 100)



for feature_type, X in feature_sets.items():

    X = X.fillna(X.mean())
    y = group

    # Standardization
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_pca = X_scaled

    # Create SVM classifier
    model = SVC(kernel='rbf', random_state=1, probability=True)

    # Generate randomly permuted indices
    np.random.seed(1)
    indices = np.random.permutation(len(y))
    X_pca = X_pca[indices]
    y = y[indices]

    # Get prediction results
    y_pred = cross_val_predict(model, X_pca, y, cv=5)
    y_prob = cross_val_predict(model, X_pca, y, cv=5, method='predict_proba')

    # Calculate confusion matrix
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()

    # Calculate metrics
    specificity = tn / (tn + fp)  # Calculate specificity

    # Calculate metrics
    metrics = {
        'Precision': precision_score(y, y_pred),
        'Recall': recall_score(y, y_pred),
        # 'Specificity': specificity,
        'Accuracy': accuracy_score(y, y_pred),
        'F1-score': f1_score(y, y_pred),
        'AUC-ROC': roc_auc_score(y, y_prob[:, 1])
    }

    

    # Print result row
    print(f" {feature_type:<15} {metrics['Precision']:<10.3f} "
          f"{metrics['Recall']:<10.3f}  {metrics['Accuracy']:<10.3f} {metrics['F1-score']:<10.3f} {metrics['AUC-ROC']:<10.3f}")

print("=" * 100)

Feature Type    Precision  Recall     Accuracy   F1-score   AUC-ROC   
----------------------------------------------------------------------------------------------------
 E               0.773      0.758       0.750      0.765      0.795     
 EI              0.866      0.848       0.848      0.857      0.933     
 EC              0.841      0.909       0.859      0.874      0.915     
 IC              0.836      0.929       0.864      0.880      0.923     
 EIC             0.904      0.949       0.918      0.926      0.962     
