In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from itertools import combinations
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

processed_train_df = pd.read_csv('Processed_TrainSet.csv')
y = processed_train_df['class']
X = processed_train_df.drop(columns=['class'])
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

top_10_features = ["SFm", "RTm", "Km", "RTd", "SCd", "Asm", "EDm", "Em", "RSm", "FMm"]

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
accuracy_results = {}

max_features_in_combination = 4

for i in range(1, max_features_in_combination + 1):
    for feature_subset in combinations(top_10_features, i):
        rf_model.fit(X_train[list(feature_subset)], y_train)
        predictions = rf_model.predict(X_val[list(feature_subset)])
        accuracy = accuracy_score(y_val, predictions)
        accuracy_results[tuple(feature_subset)] = accuracy

accuracy_df = pd.DataFrame(accuracy_results.items(), columns=["Feature Combination", "Accuracy"])
accuracy_df = accuracy_df.sort_values(by="Accuracy", ascending=False)
print(accuracy_df.head(10))


      Feature Combination  Accuracy
217   (SFm, Km, Asm, FMm)  0.833333
22             (RTm, Asm)  0.833333
51             (EDm, FMm)  0.833333
307  (RTm, Asm, EDm, FMm)  0.833333
277    (RTm, Km, Em, RSm)  0.833333
122         (Km, RTd, Em)  0.750000
109       (RTm, Asm, EDm)  0.750000
204   (SFm, Km, RTd, Asm)  0.750000
110        (RTm, Asm, Em)  0.750000
207   (SFm, Km, RTd, RSm)  0.750000
