<a href="https://colab.research.google.com/github/saleemhamo/ecg-data-feature-engineering/blob/main/Feature_Selection_Embedding_Methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prepare Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from scipy.stats import pearsonr
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.linear_model import Lasso, ElasticNet, Ridge
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [None]:
emb_feature_names = pd.read_csv('/content/drive/My Drive/Glasgow ML/Case Studies/CaseStudy3/feature_names.csv').columns.tolist()
emb_labels = pd.read_csv('/content/drive/My Drive/Glasgow ML/Case Studies/CaseStudy3/labels.csv', header=None, names=['labels'])
emb_data = pd.read_csv('/content/drive/My Drive/Glasgow ML/Case Studies/CaseStudy3/data.csv', header=None, names=emb_feature_names)



## Embedding Methods
Three methods of completeing Embedding feature engineering.
Lasso, Ridge, Elastic Net

In [None]:
# Determining the Lasso coefficients for this data
np_data, np_labels = emb_data.to_numpy(), emb_labels.to_numpy()
lasso = Lasso(alpha=1)
lasso.fit(np_data, np_labels)
print(lasso.coef_)

In [None]:
# Determining the ElasticNet coefficients for this data
elastic = ElasticNet(alpha=0.1)
elastic.fit(np_data, np_labels)
print(elastic.coef_)

In [None]:
# Determining the ridge coefficients for this data
ridge_reg = Ridge(alpha=1.0)
ridge_reg.fit(np_data, np_labels)
print(ridge_reg.coef_)
abs_coef = abs(ridge_reg.coef_[0])
plt.boxplot(abs_coef)
plt.show()

In [None]:
# creates a new data set containting the data for features that are in the top
# 25% of the values outputted from the ridge regression model

coef = ridge_reg.coef_.tolist()[0]
ridge_features = emb_feature_names
top_25_threshold = np.percentile(abs_coef, 75)
new_coefs = []
new_feature_names = []
for cof in coef:
  if abs(cof) >= top_25_threshold :
    new_coefs.append(cof)
    new_feature_names.append(ridge_features[coef.index(cof)])


print(f"Size: {len(new_coefs)} with items:" , new_coefs)
print(f"Size: {len(new_feature_names)} with items:" , new_feature_names)

q75_df = emb_data[new_feature_names].copy()
q75_df.columns = new_feature_names

q75_df.head()



Size: 108 with items: [-0.10845467826547067, 0.1477302030949224, -0.14581275299742738, -0.11995779623426733, -0.08236430841614353, -0.10277574800748498, -0.10271737702434355, 0.0999682666621376, 0.13573546380099716, -0.08575810096240696, -0.23020265902125447, 0.09558904485808284, 0.15216337697965793, 0.13448487121822011, 0.1348229019701014, 0.10166822476641474, 0.13997357572544278, 0.11017850238797597, 0.08683510302081278, 0.1368662585387325, 0.10100481376594361, -0.10136671354198448, -0.08239047393086207, 0.08753490878545139, 0.09507095999514772, 0.11175894374875968, 0.09509710886864177, 0.10113640353436881, 0.08495874382312396, 0.10221185679677616, 0.08586229105384027, 0.09399848616496129, 0.2395703979681824, -0.08314508255320163, 0.22891813256345084, -0.11447956765389829, -0.08384876275029708, -0.12403249429970649, -0.08649020354250694, -0.09259221287151244, 0.1374585361144497, 0.15589268490366143, 0.10782837643315504, -0.10417796696890994, 0.10136927373135621, 0.08468686862010083, 

Unnamed: 0,alpha_ec_2,alpha_ec_3,alpha_ec_5,alpha_ec_6,alpha_ec_7,alpha_ec_11,alpha_ec_12,alpha_ec_21,alpha_ec_22,alpha_ec_23,...,ratio_theta_15,ratio_theta_20,ratio_theta_26,ratio_theta_28,ratio_theta_30,ratio_theta_33,ratio_theta_35,ratio_theta_38,ratio_theta_40,ratio_theta_47
0,0.12858,0.11324,0.1069,0.10756,0.11064,0.095234,0.0889,0.087974,0.090929,0.12972,...,0.60008,0.47338,1.3837,0.48869,0.89434,0.77878,0.51583,0.57161,0.58709,1.0144
1,0.26239,0.38009,0.285,0.24791,0.25033,0.33123,0.25847,0.32817,0.33792,0.27856,...,0.81354,0.50451,0.77174,1.1351,0.45386,0.48172,0.70838,0.35142,0.43164,0.74867
2,0.38999,0.25772,0.44199,0.44004,0.39947,0.40516,0.41402,0.40209,0.47489,0.39364,...,0.56915,0.62039,0.66235,0.78822,0.50153,0.3728,0.51142,0.61924,1.0931,0.61417
3,0.77042,0.60161,0.72725,0.76245,0.78904,0.60776,0.69743,0.53659,0.4927,0.35979,...,1.7977,1.6808,0.87729,1.231,0.35169,0.46294,0.75828,0.23456,0.43196,0.85448
4,0.14714,0.15728,0.14154,0.13228,0.1276,0.17149,0.17162,0.25354,0.24037,0.2092,...,2.1179,1.2574,2.5849,1.1196,1.6258,2.2146,2.1237,1.8216,1.2442,2.2946


In [None]:
def drop_worst_feature(names, coefs, df):
  abs_coefs = [abs(cof) for cof in coefs]
  worst_feature = new_feature_names[abs_coefs.index(min(abs_coefs))]
  worst_feature_index = abs_coefs.index(min(abs_coefs))
  new_df = df.drop([worst_feature],axis=1)
  names.pop(worst_feature_index)
  coefs.pop(worst_feature_index)
  print(f"{worst_feature} was removed")

  return new_df, coefs, names



In [None]:
def print_classification_report(y_pred, y_true, labels):
  print(classification_report(y_true, y_pred, labels))


In [None]:
# SVM
# output the base score with all features
# loop through new features eliminating them until there is only 25 left
# or there is decrease in accuracy


# Creates the groups for the leave one out cross validation
groups = np.repeat(np.arange(len(emb_data)//10), 10)
logo = LeaveOneGroupOut()

# CV score with all features
base_svm = SVC(kernel='linear', C=1)
base_svm.fit(emb_data, emb_labels)
base_cv_scores = cross_val_score(base_svm, emb_data, np.ravel(emb_labels), cv=logo.split(emb_data, emb_labels, groups), error_score='raise')
print(f"The mean leave-one-out score for all features is: {base_cv_scores.mean()}")

# CV scores with top 25% of features
q75_svm = SVC(kernel='linear', C=1)
q75_svm.fit(q75_df, emb_labels)
q_75_cv_scores = cross_val_score(q75_svm, q75_df, np.ravel(emb_labels), cv=logo.split(q75_df, emb_labels, groups), error_score='raise')
print(f"The mean leave-one-out score for the top 75th quantile features is: {q_75_cv_scores.mean()}")



test_df = q75_df.copy()

fet_names = new_feature_names
test_coefs = new_coefs

old_mean = base_cv_scores.mean()
new_mean = q_75_cv_scores.mean()

iteration = 0

while True:
  if iteration == 0:
      df, updated_coeficients, updated_feature_names = drop_worst_feature(fet_names, test_coefs, test_df)
      clf = SVC(kernel= 'linear', C=1)
      clf.fit(df, emb_labels)
      new_mean = cross_val_score(clf, df, np.ravel(emb_labels), cv=logo.split(df, emb_labels, groups), error_score='raise').mean()
      iteration +=1
      print('first iteration done')
      print(f"There are {len(updated_feature_names)} left and the CV score is {new_mean}")

  else:
    saved_features = updated_feature_names
    new_df, new_updated_coeficients, new_updated_feature_names = drop_worst_feature(updated_feature_names, updated_coeficients, df)
    df, updated_coeficients, updated_feature_names = new_df, new_updated_coeficients, new_updated_feature_names
    clf = SVC(kernel='linear', C=1)
    clf.fit(df, emb_labels )
    new_mean = cross_val_score(clf, df, np.ravel(emb_labels), cv=logo.split(df, emb_labels, groups), error_score='raise').mean()
    if old_mean > new_mean:
      break
    else:
        old_mean = new_mean
    print(f"There are {len(new_feature_names)} left and the CV score is {new_mean}")

print("\n\n")
print(f"The highest cross validation score is {round(old_mean,4)} with the following {len(saved_features)} features being used:")
print(saved_features)

x_train, x_test, y_train, y_test = train_test_split(df[saved_features], emb_labels, test_size=0.1)

best_clf = SVC(kernel='linear', C=1)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print('\n\n',classification_report(y_test, y_pred))


In [None]:
# K Nearest Neihbour


# Creates the groups for the leave one out cross validation
groups = np.repeat(np.arange(len(emb_data)//10), 10)
logo = LeaveOneGroupOut()

# CV score with all features
base_knn = KNeighborsClassifier(n_neighbors=5)
base_knn.fit(emb_data, emb_labels)
base_cv_scores = cross_val_score(base_knn, emb_data, np.ravel(emb_labels), cv=logo.split(emb_data, emb_labels, groups), error_score='raise')
print(f"The mean leave-one-out score for all features is: {base_cv_scores.mean()}")

# CV scores with top 25% of features
q75_knn = KNeighborsClassifier(n_neighbors=5)
q75_knn.fit(q75_df, emb_labels)
q_75_cv_scores = cross_val_score(q75_knn, q75_df, np.ravel(emb_labels), cv=logo.split(q75_df, emb_labels, groups), error_score='raise')
print(f"The mean leave-one-out score for the top 75th quantile features is: {q_75_cv_scores.mean()}")



test_df = q75_df.copy()

fet_names = new_feature_names
test_coefs = new_coefs

old_mean = base_cv_scores.mean()
new_mean = q_75_cv_scores.mean()

iteration = 0

while True:
  if iteration == 0:
      df, updated_coeficients, updated_feature_names = drop_worst_feature(fet_names, test_coefs, test_df)
      clf = KNeighborsClassifier(n_neighbors=5)
      clf.fit(df, emb_labels)
      new_mean = cross_val_score(clf, df, np.ravel(emb_labels), cv=logo.split(df, emb_labels, groups), error_score='raise').mean()
      iteration +=1
      print('first iteration done')
  else:
    saved_features = updated_feature_names
    new_df, new_updated_coeficients, new_updated_feature_names = drop_worst_feature(updated_feature_names, updated_coeficients, df)
    df, updated_coeficients, updated_feature_names = new_df, new_updated_coeficients, new_updated_feature_names
    clf = KNeighborsClassifier(n_neighbors=5)
    clf.fit(df, emb_labels )
    new_mean = cross_val_score(clf, df, np.ravel(emb_labels), cv=logo.split(df, emb_labels, groups), error_score='raise').mean()
    if old_mean > new_mean:
      break
    else:
        old_mean = new_mean
    print(f"There are {len(new_feature_names)} left and the CV score is {new_mean}")

print("\n\n")
print(f"The highest cross validation score is {round(old_mean,4)} with the following {len(saved_features)} features being used:")
print(saved_features)


x_train, x_test, y_train, y_test = train_test_split(df[saved_features], emb_labels, test_size=0.1)

best_clf =KNeighborsClassifier(n_neighbors=5)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print('\n\n',classification_report(y_test, y_pred))


  return self._fit(X, y)


The mean leave-one-out score for all features is: 0.6888888888888889
The mean leave-one-out score for the top 75th quantile features is: 0.7444444444444445
ratio_alpha_47 was removed


  return self._fit(X, y)
  return self._fit(X, y)


first iteration done
ratio_beta_45 was removed
There are 106 left and the CV score is 0.7444444444444445
alpha_ec_7 was removed


  return self._fit(X, y)
  return self._fit(X, y)


There are 105 left and the CV score is 0.7444444444444445
beta_ec_26 was removed
There are 104 left and the CV score is 0.7444444444444445
beta_eo_31 was removed


  return self._fit(X, y)
  return self._fit(X, y)


There are 103 left and the CV score is 0.7444444444444445
alpha_eo_7 was removed
There are 102 left and the CV score is 0.7444444444444445
theta_eo_2 was removed


  return self._fit(X, y)
  return self._fit(X, y)


There are 101 left and the CV score is 0.7444444444444445
ratio_theta_4 was removed
There are 100 left and the CV score is 0.7444444444444445
alpha_eo_16 was removed


  return self._fit(X, y)
  return self._fit(X, y)





The highest cross validation score is 0.7444 with the following 99 features being used:
['alpha_ec_2', 'alpha_ec_3', 'alpha_ec_5', 'alpha_ec_6', 'alpha_ec_11', 'alpha_ec_12', 'alpha_ec_21', 'alpha_ec_22', 'alpha_ec_23', 'alpha_ec_31', 'alpha_ec_41', 'alpha_ec_42', 'alpha_ec_44', 'alpha_ec_45', 'alpha_ec_46', 'beta_ec_5', 'beta_ec_6', 'beta_ec_7', 'beta_ec_11', 'beta_ec_12', 'beta_ec_22', 'theta_ec_0', 'theta_ec_1', 'theta_ec_2', 'theta_ec_17', 'theta_ec_18', 'theta_ec_25', 'theta_ec_26', 'theta_ec_27', 'alpha_eo_2', 'alpha_eo_3', 'alpha_eo_9', 'alpha_eo_10', 'alpha_eo_18', 'alpha_eo_24', 'alpha_eo_31', 'alpha_eo_33', 'alpha_eo_34', 'alpha_eo_41', 'beta_eo_3', 'beta_eo_5', 'beta_eo_6', 'beta_eo_15', 'beta_eo_25', 'beta_eo_26', 'beta_eo_34', 'beta_eo_41', 'theta_eo_18', 'theta_eo_19', 'theta_eo_25', 'theta_eo_26', 'theta_eo_27', 'theta_eo_28', 'ratio_alpha_0', 'ratio_alpha_6', 'ratio_alpha_8', 'ratio_alpha_9', 'ratio_alpha_10', 'ratio_alpha_18', 'ratio_alpha_23', 'ratio_alpha_24', 'ra

  return self._fit(X, y)


In [None]:
# Random Forest Classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)

groups = np.repeat(np.arange(len(emb_data)//10), 10)
logo = LeaveOneGroupOut()

# CV score with all features
base_rf = RandomForestClassifier(n_estimators=100, random_state=42)
base_rf.fit(emb_data, emb_labels)
base_cv_scores = cross_val_score(base_rf, emb_data, np.ravel(emb_labels), cv=logo.split(emb_data, emb_labels, groups), error_score='raise')
print(f"The mean leave-one-out score for all features is: {base_cv_scores.mean()}")

# CV scores with top 25% of features
q75_rf = RandomForestClassifier(n_estimators=100, random_state=42)
q75_rf.fit(q75_df, emb_labels)
q_75_cv_scores = cross_val_score(q75_rf, q75_df, np.ravel(emb_labels), cv=logo.split(q75_df, emb_labels, groups), error_score='raise')
print(f"The mean leave-one-out score for the top 75th quantile features is: {q_75_cv_scores.mean()}")



test_df = q75_df.copy()

fet_names = new_feature_names
test_coefs = new_coefs

old_mean = base_cv_scores.mean()
new_mean = q_75_cv_scores.mean()

iteration = 0

while True:
  if iteration == 0:
      df, updated_coeficients, updated_feature_names = drop_worst_feature(fet_names, test_coefs, test_df)
      clf = RandomForestClassifier(n_estimators=100, random_state=42)
      clf.fit(df, emb_labels)
      new_mean = cross_val_score(clf, df, np.ravel(emb_labels), cv=logo.split(df, emb_labels, groups), error_score='raise').mean()
      iteration +=1
      print('first iteration done')
  else:
    saved_features = updated_feature_names
    new_df, new_updated_coeficients, new_updated_feature_names = drop_worst_feature(updated_feature_names, updated_coeficients, df)
    df, updated_coeficients, updated_feature_names = new_df, new_updated_coeficients, new_updated_feature_names
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(df, emb_labels )
    new_mean = cross_val_score(clf, df, np.ravel(emb_labels), cv=logo.split(df, emb_labels, groups), error_score='raise').mean()
    if old_mean - new_mean > (old_mean * 0.049):
      break
    else:
        old_mean = new_mean

    print(f"There are {len(new_feature_names)} left and the CV score is {new_mean}")

print("\n\n")
print(f"The highest cross validation score is {round(old_mean,4)} with the following {len(saved_features)} features being used:")
print(saved_features)

x_train, x_test, y_train, y_test = train_test_split(df[saved_features], emb_labels, test_size=0.1)

best_clf =RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print('\n\n',classification_report(y_test, y_pred))

  base_rf.fit(emb_data, emb_labels)


The mean leave-one-out score for all features is: 0.8222222222222223


  q75_rf.fit(q75_df, emb_labels)


The mean leave-one-out score for the top 75th quantile features is: 0.8388888888888889
ratio_alpha_47 was removed


  clf.fit(df, emb_labels)


first iteration done
ratio_beta_45 was removed


  clf.fit(df, emb_labels )


There are 106 left and the CV score is 0.8388888888888889
alpha_ec_7 was removed


  clf.fit(df, emb_labels )


There are 105 left and the CV score is 0.8
beta_ec_26 was removed


  clf.fit(df, emb_labels )


There are 104 left and the CV score is 0.8277777777777778
beta_eo_31 was removed


  clf.fit(df, emb_labels )


There are 103 left and the CV score is 0.8333333333333334
alpha_eo_7 was removed


  clf.fit(df, emb_labels )


There are 102 left and the CV score is 0.838888888888889
theta_eo_2 was removed


  clf.fit(df, emb_labels )


There are 101 left and the CV score is 0.8333333333333335
ratio_theta_4 was removed


  clf.fit(df, emb_labels )


There are 100 left and the CV score is 0.8166666666666668
alpha_eo_16 was removed


  clf.fit(df, emb_labels )


There are 99 left and the CV score is 0.838888888888889
ratio_beta_9 was removed


  clf.fit(df, emb_labels )


There are 98 left and the CV score is 0.827777777777778
beta_eo_6 was removed


  clf.fit(df, emb_labels )


There are 97 left and the CV score is 0.7944444444444445
theta_ec_25 was removed


  clf.fit(df, emb_labels )


There are 96 left and the CV score is 0.8500000000000001
theta_eo_26 was removed


  clf.fit(df, emb_labels )


There are 95 left and the CV score is 0.8222222222222224
alpha_ec_23 was removed


  clf.fit(df, emb_labels )


There are 94 left and the CV score is 0.8500000000000001
theta_ec_27 was removed


  clf.fit(df, emb_labels )





The highest cross validation score is 0.85 with the following 93 features being used:
['alpha_ec_2', 'alpha_ec_3', 'alpha_ec_5', 'alpha_ec_6', 'alpha_ec_11', 'alpha_ec_12', 'alpha_ec_21', 'alpha_ec_22', 'alpha_ec_31', 'alpha_ec_41', 'alpha_ec_42', 'alpha_ec_44', 'alpha_ec_45', 'alpha_ec_46', 'beta_ec_5', 'beta_ec_6', 'beta_ec_7', 'beta_ec_11', 'beta_ec_12', 'beta_ec_22', 'theta_ec_0', 'theta_ec_1', 'theta_ec_2', 'theta_ec_17', 'theta_ec_18', 'theta_ec_26', 'alpha_eo_2', 'alpha_eo_3', 'alpha_eo_9', 'alpha_eo_10', 'alpha_eo_18', 'alpha_eo_24', 'alpha_eo_31', 'alpha_eo_33', 'alpha_eo_34', 'alpha_eo_41', 'beta_eo_3', 'beta_eo_5', 'beta_eo_15', 'beta_eo_25', 'beta_eo_26', 'beta_eo_34', 'beta_eo_41', 'theta_eo_18', 'theta_eo_19', 'theta_eo_25', 'theta_eo_27', 'theta_eo_28', 'ratio_alpha_0', 'ratio_alpha_6', 'ratio_alpha_8', 'ratio_alpha_9', 'ratio_alpha_10', 'ratio_alpha_18', 'ratio_alpha_23', 'ratio_alpha_24', 'ratio_alpha_30', 'ratio_alpha_36', 'ratio_beta_3', 'ratio_beta_5', 'ratio_bet

  clf.fit(x_train, y_train)




               precision    recall  f1-score   support

         0.0       0.90      0.82      0.86        11
         1.0       0.75      0.86      0.80         7

    accuracy                           0.83        18
   macro avg       0.82      0.84      0.83        18
weighted avg       0.84      0.83      0.83        18



In [None]:
# lists of the best features for the three different classifiers
rf_features = ['alpha_ec_2', 'alpha_ec_3', 'alpha_ec_5', 'alpha_ec_6', 'alpha_ec_11', 'alpha_ec_12', 'alpha_ec_21', 'alpha_ec_22', 'alpha_ec_31', 'alpha_ec_41', 'alpha_ec_42', 'alpha_ec_44', 'alpha_ec_45', 'alpha_ec_46', 'beta_ec_5', 'beta_ec_6', 'beta_ec_7', 'beta_ec_11', 'beta_ec_12', 'beta_ec_22', 'theta_ec_0', 'theta_ec_1', 'theta_ec_2', 'theta_ec_17', 'theta_ec_18', 'theta_ec_26', 'alpha_eo_2', 'alpha_eo_3', 'alpha_eo_9', 'alpha_eo_10', 'alpha_eo_18', 'alpha_eo_24', 'alpha_eo_31', 'alpha_eo_33', 'alpha_eo_34', 'alpha_eo_41', 'beta_eo_3', 'beta_eo_5', 'beta_eo_15', 'beta_eo_25', 'beta_eo_26', 'beta_eo_34', 'beta_eo_41', 'theta_eo_18', 'theta_eo_19', 'theta_eo_25', 'theta_eo_27', 'theta_eo_28', 'ratio_alpha_0', 'ratio_alpha_6', 'ratio_alpha_8', 'ratio_alpha_9', 'ratio_alpha_10', 'ratio_alpha_18', 'ratio_alpha_23', 'ratio_alpha_24', 'ratio_alpha_30', 'ratio_alpha_36', 'ratio_beta_3', 'ratio_beta_5', 'ratio_beta_8', 'ratio_beta_11', 'ratio_beta_12', 'ratio_beta_15', 'ratio_beta_17', 'ratio_beta_18', 'ratio_beta_21', 'ratio_beta_23', 'ratio_beta_24', 'ratio_beta_25', 'ratio_beta_26', 'ratio_beta_28', 'ratio_beta_31', 'ratio_beta_34', 'ratio_beta_36', 'ratio_beta_41', 'ratio_beta_42', 'ratio_beta_44', 'ratio_beta_47', 'ratio_theta_1', 'ratio_theta_6', 'ratio_theta_12', 'ratio_theta_14', 'ratio_theta_15', 'ratio_theta_20', 'ratio_theta_26', 'ratio_theta_28', 'ratio_theta_30', 'ratio_theta_33', 'ratio_theta_35', 'ratio_theta_38', 'ratio_theta_40', 'ratio_theta_47']
knn_features = ['alpha_ec_2', 'alpha_ec_3', 'alpha_ec_5', 'alpha_ec_6', 'alpha_ec_11', 'alpha_ec_12', 'alpha_ec_21', 'alpha_ec_22', 'alpha_ec_23', 'alpha_ec_31', 'alpha_ec_41', 'alpha_ec_42', 'alpha_ec_44', 'alpha_ec_45', 'alpha_ec_46', 'beta_ec_5', 'beta_ec_6', 'beta_ec_7', 'beta_ec_11', 'beta_ec_12', 'beta_ec_22', 'theta_ec_0', 'theta_ec_1', 'theta_ec_2', 'theta_ec_17', 'theta_ec_18', 'theta_ec_25', 'theta_ec_26', 'theta_ec_27', 'alpha_eo_2', 'alpha_eo_3', 'alpha_eo_9', 'alpha_eo_10', 'alpha_eo_18', 'alpha_eo_24', 'alpha_eo_31', 'alpha_eo_33', 'alpha_eo_34', 'alpha_eo_41', 'beta_eo_3', 'beta_eo_5', 'beta_eo_6', 'beta_eo_15', 'beta_eo_25', 'beta_eo_26', 'beta_eo_34', 'beta_eo_41', 'theta_eo_18', 'theta_eo_19', 'theta_eo_25', 'theta_eo_26', 'theta_eo_27', 'theta_eo_28', 'ratio_alpha_0', 'ratio_alpha_6', 'ratio_alpha_8', 'ratio_alpha_9', 'ratio_alpha_10', 'ratio_alpha_18', 'ratio_alpha_23', 'ratio_alpha_24', 'ratio_alpha_30', 'ratio_alpha_36', 'ratio_beta_3', 'ratio_beta_5', 'ratio_beta_8', 'ratio_beta_9', 'ratio_beta_11', 'ratio_beta_12', 'ratio_beta_15', 'ratio_beta_17', 'ratio_beta_18', 'ratio_beta_21', 'ratio_beta_23', 'ratio_beta_24', 'ratio_beta_25', 'ratio_beta_26', 'ratio_beta_28', 'ratio_beta_31', 'ratio_beta_34', 'ratio_beta_36', 'ratio_beta_41', 'ratio_beta_42', 'ratio_beta_44', 'ratio_beta_47', 'ratio_theta_1', 'ratio_theta_6', 'ratio_theta_12', 'ratio_theta_14', 'ratio_theta_15', 'ratio_theta_20', 'ratio_theta_26', 'ratio_theta_28', 'ratio_theta_30', 'ratio_theta_33', 'ratio_theta_35', 'ratio_theta_38', 'ratio_theta_40', 'ratio_theta_47']
svm_features = ['alpha_ec_2', 'alpha_ec_3', 'alpha_ec_5', 'alpha_ec_6', 'alpha_ec_11', 'alpha_ec_12', 'alpha_ec_21', 'alpha_ec_22', 'alpha_ec_23', 'alpha_ec_31', 'alpha_ec_41', 'alpha_ec_42', 'alpha_ec_44', 'alpha_ec_45', 'alpha_ec_46', 'beta_ec_5', 'beta_ec_6', 'beta_ec_7', 'beta_ec_11', 'beta_ec_12', 'beta_ec_22', 'theta_ec_0', 'theta_ec_1', 'theta_ec_2', 'theta_ec_17', 'theta_ec_18', 'theta_ec_25', 'theta_ec_26', 'theta_ec_27', 'alpha_eo_2', 'alpha_eo_3', 'alpha_eo_9', 'alpha_eo_10', 'alpha_eo_16', 'alpha_eo_18', 'alpha_eo_24', 'alpha_eo_31', 'alpha_eo_33', 'alpha_eo_34', 'alpha_eo_41', 'beta_eo_3', 'beta_eo_5', 'beta_eo_6', 'beta_eo_15', 'beta_eo_25', 'beta_eo_26', 'beta_eo_34', 'beta_eo_41', 'theta_eo_18', 'theta_eo_19', 'theta_eo_25', 'theta_eo_26', 'theta_eo_27', 'theta_eo_28', 'ratio_alpha_0', 'ratio_alpha_6', 'ratio_alpha_8', 'ratio_alpha_9', 'ratio_alpha_10', 'ratio_alpha_18', 'ratio_alpha_23', 'ratio_alpha_24', 'ratio_alpha_30', 'ratio_alpha_36', 'ratio_beta_3', 'ratio_beta_5', 'ratio_beta_8', 'ratio_beta_9', 'ratio_beta_11', 'ratio_beta_12', 'ratio_beta_15', 'ratio_beta_17', 'ratio_beta_18', 'ratio_beta_21', 'ratio_beta_23', 'ratio_beta_24', 'ratio_beta_25', 'ratio_beta_26', 'ratio_beta_28', 'ratio_beta_31', 'ratio_beta_34', 'ratio_beta_36', 'ratio_beta_41', 'ratio_beta_42', 'ratio_beta_44', 'ratio_beta_47', 'ratio_theta_1', 'ratio_theta_6', 'ratio_theta_12', 'ratio_theta_14', 'ratio_theta_15', 'ratio_theta_20', 'ratio_theta_26', 'ratio_theta_28', 'ratio_theta_30', 'ratio_theta_33', 'ratio_theta_35', 'ratio_theta_38', 'ratio_theta_40', 'ratio_theta_47']
