In [1]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Load the dataset from an Excel file
FILE_PATH = 'Output/Subjects_Separate_Sheets.xlsx'
data = pd.read_excel(FILE_PATH, sheet_name=0)

# Selected features for predicting the target class
selected_features = [
    'MedianAbsDev', 'HFD', 'HFD_cardio', 'AVpw', 'meanA1', 
    'meanArea', 'meanIPAR', 'stdIPAR', 'meanT1', 'meanT2'
]

# Prepare data by selecting the features (X) and target variable (y)
X = data[selected_features].copy()  # Features
y = data['Class'].copy()  # Target variable

# Split data into 70% training and 30% testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Define the machine learning models to be evaluated
models = {
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='linear', random_state=42),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
}

# Train and evaluate each model
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Make predictions on the testing set
    y_pred = model.predict(X_test)
    
    # Calculate the model's accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Perform 5-fold cross-validation
    cv_scores = cross_val_score(model, X_train, y_train, cv=5)

    # Optionally, you can store or log the results here