In [1]:
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

csv_path = os.getenv('PROCESSED_DATA_PATH')
df = pd.read_csv(csv_path)

In [2]:
all_features = [col for col in df.columns[:-1] if col != 'action']
raw_features = list(df.columns[:90])
new_features = [feature for feature in all_features if feature not in raw_features]

In [23]:
FEATURES = all_features
FEATURE_SELECTION = True
FEATURES_TO_SELECT = 30

In [24]:
import numpy as np
import warnings
from sklearn.exceptions import ConvergenceWarning

from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score, KFold
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
warnings.simplefilter('ignore', ConvergenceWarning)


selection_svm = SVC(kernel='linear', max_iter=1000000, random_state=8)
kfold = KFold(n_splits=5, shuffle=True, random_state=8)

rf_accs = []
svm_accs = []
combined_accs = []

X = df[FEATURES]
y = df['label']

for train_index, test_index in kfold.split(X):

    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    if FEATURE_SELECTION:

        selector = RFE(selection_svm, n_features_to_select=FEATURES_TO_SELECT, step=1)
        selector = selector.fit(X_train, y_train)
        selected_features = [FEATURES[i] for i in range(len(FEATURES)) if selector.support_[i]]

        print("Selected features:", selected_features)

        X_train = selector.transform(X_train)
        X_test = selector.transform(X_test)

    svm = SVC(kernel='linear', random_state=8)
    svm.fit(X_train, y_train)
    svm_accuracy = svm.score(X_test, y_test)

    rf = RandomForestClassifier(random_state=8)
    rf.fit(X_train, y_train)
    rf_accuracy = rf.score(X_test, y_test)
    
    mean_accuracy = (svm_accuracy + rf_accuracy)/2

    rf_accs.append(rf_accuracy)
    svm_accs.append(svm_accuracy)
    combined_accs.append(mean_accuracy)

print('\nRf', np.mean(rf_accs), 'svm', np.mean(svm_accs), 'combined', np.mean(combined_accs))

Selected features: ['y_1', 'x_15', 'x_17', 'z_20', 'x_21', 'z_25', 'z_27', 'y_28', 'x_29', 'z_29', 'x_mean', 'x_variance', 'x_std', 'x_max_value', 'x_kurtosis', 'y_mean', 'y_variance', 'y_std', 'y_range', 'y_min_value', 'y_skewness', 'y_interquartile_range', 'z_median', 'z_max_value', 'z_rms', 'z_interquartile_range', 'z_mean_absolute_deviation', 'cross_corr_x_y', 'cross_corr_y_z', 'cross_corr_z_x']
Selected features: ['y_9', 'z_12', 'z_16', 'y_19', 'z_20', 'z_22', 'y_23', 'z_23', 'z_24', 'x_25', 'z_25', 'x_29', 'x_max_value', 'x_rms', 'x_skewness', 'x_kurtosis', 'y_variance', 'y_median', 'y_rms', 'y_skewness', 'y_interquartile_range', 'z_variance', 'z_std', 'z_median', 'z_range', 'z_max_value', 'z_rms', 'z_interquartile_range', 'cross_corr_x_y', 'cross_corr_z_x']
Selected features: ['x_11', 'x_15', 'x_17', 'z_20', 'x_21', 'z_23', 'z_25', 'z_27', 'y_28', 'z_29', 'x_variance', 'x_median', 'x_range', 'x_max_value', 'x_signal_magnitude_area', 'x_index_min_value', 'x_kurtosis', 'y_mean', '

In [26]:
import pandas as pd

fold_names = [f'Fold_{i+1}' for i in range(5)]
data = {
        'rf': rf_accs + [np.mean(rf_accs)],
        'svm': svm_accs + [np.mean(svm_accs)],
        'combined': combined_accs + [np.mean(combined_accs)]
        }

fold_names.append('Mean folds')

results_df = pd.DataFrame(data, index=fold_names)

results_df

Unnamed: 0,rf,svm,combined
Fold_1,0.999227,0.994586,0.996906
Fold_2,0.996133,0.989172,0.992653
Fold_3,0.999226,0.993808,0.996517
Fold_4,0.997678,0.99226,0.994969
Fold_5,0.99613,0.993034,0.994582
Mean folds,0.997679,0.992572,0.995125
