<a href="https://colab.research.google.com/github/Ditsuhi/ExploratoryAnalysis_FeatureSelection/blob/main/mRMR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import required libraries

import pandas as pd
from sklearn.feature_selection import f_regression

# the dataset can be found at the following link: https://doi.org/10.5281/zenodo.6497108. 
# the path provided below can be changed depending your data location.

data = pd.read_csv('/content/Madrid_wind_2019.csv', index_col='Unnamed: 0')

# inputs:
#    X: pandas.DataFrame, features
#    y: pandas.Series, target variable
#    K: number of features to select (in our example K = {2, 3, 4, 5, 6, 7, 8})

X = data.drop(['NO2'], axis=1)
y = data['NO2']


# compute F-statistics and initialize correlation matrix
F = pd.Series(f_regression(X, y)[0], index = X.columns)
corr = pd.DataFrame(.00001, index = X.columns, columns = X.columns)

# initialize list of selected features and list of excluded features
selected = []
not_selected = X.columns.to_list()

# repeat K times
for i in range(K):
  
    # compute (absolute) correlations between the last selected feature and all the (currently) excluded features
    if i > 0:
        last_selected = selected[-1]
        corr.loc[not_selected, last_selected] = X[not_selected].corrwith(X[last_selected]).abs().clip(.00001)
        
    # compute FCQ score for all the (currently) excluded features (this is Formula 2)
    score = F.loc[not_selected] / corr.loc[not_selected, selected].mean(axis = 1).fillna(.00001)
    
    # find best feature, add it to selected and remove it from not_selected
    best = score.index[score.argmax()]
    selected.append(best)
    not_selected.remove(best)