# Import Libraries

In [41]:
import pandas as pd
import re
import numpy as np

In [42]:
DATA_PATH = '../data'
OUTPUTS_PATH = '../src/features'

In [43]:
# Read scores from eval_feature.py
# Split scores into model-specific scores
log_reg_scores = []
SNN_scores = []
for i in range(6):
    with open(f"{OUTPUTS_PATH}/feature_scores_{i}.txt", "r") as f:
        feature_scores = list(zip(*eval(f.readline())))
        log_reg_scores.append(feature_scores[0])
        SNN_scores.append(feature_scores[1])
# Get median score from all evaluations
log_reg_scores = np.median(log_reg_scores, axis=0)
SNN_scores = np.median(SNN_scores, axis=0)

# Get top 40 features

In [44]:
# Get feature names
feature_matrix = pd.read_csv(f'{DATA_PATH}/processed/feature_matrix.csv')
feature_names = list(feature_matrix.iloc[:,1:].columns)

In [45]:
# Bind feature name with associated score
log_reg_scores = dict(zip(feature_names, log_reg_scores))
SNN_scores = dict(zip(feature_names, SNN_scores))
# Sort based on score
sorted_log_reg_scores = sorted(log_reg_scores.items(), key=lambda x: x[1])
sorted_SNN_scores = sorted(SNN_scores.items(), key=lambda x: x[1])

In [46]:
# Save top 40 features
top_log_reg = [name for name,_ in sorted_log_reg_scores[:40]]
top_SNN = [name for name,_ in sorted_SNN_scores[:40]]

# Create new reduced feature matrices

In [47]:
# Get top 40 features
log_reg_feature_matrix = feature_matrix[top_log_reg]
# Append target to beginning of feature matrix
log_reg_feature_matrix.insert(
    0, 
    column='FPF_TARGET',
    value=feature_matrix['FPF_TARGET']
)
# Save feature matrix
log_reg_feature_matrix.to_csv(
    f'{DATA_PATH}/processed/log_reg_feature_matrix.csv', 
    index=False
)

In [48]:
# Get top 40 features
SNN_feature_matrix = feature_matrix[top_SNN]
# Append target to beginning of feature matrix
SNN_feature_matrix.insert(
    0, 
    column='FPF_TARGET',
    value=feature_matrix['FPF_TARGET']
)
# Save feature matrix
SNN_feature_matrix.to_csv(
    f'{DATA_PATH}/processed/SNN_feature_matrix.csv', 
    index=False
)