In [None]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier


# Define parameter grid for Decision Tree
dt_param_grid = {
    'max_depth': [5, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
    'criterion': ['gini', 'entropy']
}

# Define the directory containing the CSV files
stat_feature_dir = 'E:/Abroad period research/new idea implementation codes/Second part of the paper/26 features results/statistical_features'
# Load the CSV files
train_df = pd.read_csv(os.path.join(stat_feature_dir, "train_stat_features.csv"))

# Separate features and labels
train_stat_features = train_df.drop(columns=['label']).values
train_labels = train_df['label'].values

# Initialize Decision Tree Classifier
dt_clf = DecisionTreeClassifier(random_state=0)

# Use GridSearchCV for Decision Tree with a limited number of folds (e.g., 3-fold) to save time
dt_grid_search = GridSearchCV(estimator=dt_clf, param_grid=dt_param_grid, cv=3, scoring='accuracy', n_jobs=-1)

# Fit the grid search model on training data
dt_grid_search.fit(train_stat_features, train_labels)

# Retrieve the best parameters and accuracy
best_dt_params = dt_grid_search.best_params_
best_dt_score = dt_grid_search.best_score_
print(f"Best Decision Tree Parameters: {best_dt_params}")
print(f"Best Decision Tree Cross-Validation Accuracy: {best_dt_score:.4f}")


Best Decision Tree Parameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best Decision Tree Cross-Validation Accuracy: 0.9000
