In [1]:
import pandas as pd

data = pd.read_csv('merged_data.csv').drop(columns=['name'], axis=1).dropna()
X = data.drop(columns=['activity'], axis=1)
y = data['activity']


## RelieF

In [3]:
import sklearn_relief as relief
import numpy as np

# Load some data and put it in a numpy.array matrix
my_input_matrix = np.array(X)

# Load the label vector
my_label_vector = np.array(y)

r = relief.Relief(
    n_features=5 # Choose the best 3 features
) # Will run by default on all processors concurrently

my_transformed_matrix = r.fit_transform(
    my_input_matrix,
    my_label_vector
)

# my_transformed_matrix will now contain the 3 highest-ranked feature
# vectors from my_input_matrix.

# If you are interested in the computed weights, print them with
# print(r.w_) # Each i-th weight will be the weight of the i-th feature
empty = {}
i = 0
for col in X.columns:
    empty[col] = r.w_[i]
    i += 1

empty

{'acc_x_mpf1': 1.8253675726928618,
 'acc_y_mpf1': 1.7437167641185711,
 'acc_z_mpf1': 1.2685883695658873,
 'acc_x_iqr': 2.299401197604791,
 'acc_y_iqr': 2.560878243512974,
 'acc_z_iqr': 1.5748502994011975,
 'acc_x_wilson_amp': 59.46,
 'acc_y_wilson_amp': 51.48,
 'acc_z_wilson_amp': 61.52,
 'acc_x_crossco': 200902.5077725391,
 'acc_y_crossco': 613720.4750072868,
 'acc_z_crossco': 278202.52855451277,
 'acc_x_three_quarters': 2.5389221556886223,
 'acc_y_three_quarters': 2.758483033932136,
 'acc_z_three_quarters': 1.7764471057884235,
 'acc_x_one_quarter': 0.43512974051896214,
 'acc_y_one_quarter': 0.46506986027944114,
 'acc_z_one_quarter': 0.4570858283433134,
 'acc_x_corecoef': 0.40722257293250425,
 'acc_y_corecoef': 0.5765557028248463,
 'acc_z_corecoef': 0.4603167380456739,
 'acc_mpf': 7.251566771442257,
 'acc_x_slope_change': 5.33,
 'acc_y_slope_change': 5.16,
 'acc_z_slope_change': 6.3,
 'acc_x_rms': 5.594616869651346,
 'acc_y_rms': 3.1137423258590866,
 'acc_z_rms': 6.521787151324162,
 '

## One-R

In [4]:
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from the CSV file
data = pd.read_csv('merged_data.csv')

# Drop the 'name' column
data.drop('name', axis=1, inplace=True)

# Split the data into features (X) and labels (y)
X = data.drop('activity', axis=1)
y = data['activity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Perform feature selection using the One-R method
selector = SelectKBest(score_func=f_classif, k=40)
selector.fit(X_train, y_train)
selected_features = X_train.columns[selector.get_support()]
print(selected_features)

Index(['acc_x_wilson_amp', 'acc_y_wilson_amp', 'acc_z_wilson_amp',
       'acc_x_one_quarter', 'acc_y_stdev', 'acc_x_mad', 'acc_y_mad',
       'acc_z_mad', 'acc_x_wf', 'acc_y_wf', 'acc_z_wf', 'acc_x_p2p',
       'acc_y_p2p', 'acc_z_p2p', 'acc_x_median_frequency', 'acc_x_kurtosis_f',
       'acc_x_skewness_f', 'acc_x_autoregburg_1', 'acc_x_autoregburg_2',
       'acc_x_autoregburg_3', 'acc_x_autoregburg_4', 'acc_y_autoregburg_1',
       'acc_y_autoregburg_2', 'acc_y_autoregburg_3', 'acc_z_autoregburg_1',
       'acc_z_autoregburg_2', 'acc_z_autoregburg_3', 'acc_z_enwacto_2',
       'acc_z_enwacto_3', 'acc_z_enwacto_4', 'acc_z_enwacto_5',
       'acc_z_enwacto_6', 'acc_y_enwacto_3', 'acc_y_enwacto_4',
       'acc_y_enwacto_5', 'acc_x_enwacto_2', 'acc_x_enwacto_3',
       'acc_x_enwacto_4', 'acc_x_enwacto_5', 'acc_x_enwacto_6'],
      dtype='object')


## Symmetrical Uncertainity

In [5]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, mutual_info_classif

# Load the merged data
data = pd.read_csv('merged_data.csv')

# Separate the features and labels
X = data.drop(['name', 'activity'], axis=1) # Drop the 'name' column and use all other columns as features
y = data['activity'] # Use the 'activity' column as the label

# Use the SelectKBest function with mutual information as the scoring metric
kbest = SelectKBest(score_func=mutual_info_classif, k=40) # Select the top 10 features
kbest.fit(X, y)

# Print the selected features
features = X.columns[kbest.get_support()]
print(features)


Index(['acc_x_wilson_amp', 'acc_y_wilson_amp', 'acc_z_wilson_amp',
       'acc_x_crossco', 'acc_y_crossco', 'acc_z_crossco', 'acc_mpf',
       'acc_x_rms', 'acc_y_rms', 'acc_z_rms', 'acc_x_stdev', 'acc_y_stdev',
       'acc_z_stdev', 'acc_x_mean', 'acc_y_mean', 'acc_z_mean', 'acc_x_mad',
       'acc_y_mad', 'acc_z_mad', 'acc_x_wf', 'acc_y_wf', 'acc_z_wf',
       'acc_x_mav', 'acc_y_mav', 'acc_z_mav', 'acc_x_p2p', 'acc_y_p2p',
       'acc_z_p2p', 'acc_x_kurtosis_f', 'acc_y_kurtosis_f', 'acc_z_kurtosis_f',
       'acc_x_skewness_f', 'acc_y_skewness_f', 'acc_z_skewness_f',
       'acc_x_top3', 'acc_z_enwacto_1', 'acc_z_enwacto_2', 'acc_y_enwacto_1',
       'acc_x_enwacto_1', 'acc_x_enwacto_2'],
      dtype='object')


## MRMR

In [60]:
import pymrmr
from sklearn.preprocessing import LabelEncoder
data = pd.read_csv('merged_data.csv').drop(columns=['name'], axis=1).dropna()


le = LabelEncoder()
le.fit(data['activity'])
data['activity'] = le.transform(data['activity'])

activity_col = data.pop('activity')
data.insert(0, 'activity', activity_col)

pymrmr.mRMR(data, 'MID', 50)

MemoryError: bad array new length