In [31]:
import pandas as pd
from sklearn.datasets import make_classification
from feature_engine.selection import DropCorrelatedFeatures

# make dataframe with some correlated variables
def make_data():
    X, y = make_classification(n_samples=1000,
                           n_features=12,
                           n_redundant=4,
                           n_clusters_per_class=1,
                           weights=[0.50],
                           class_sep=2,
                           random_state=1)

    # trasform arrays into pandas df and series
    colnames = ['var_'+str(i) for i in range(12)]
    X = pd.DataFrame(X, columns =colnames)
    return X

X = make_data()

In [32]:
tr = DropCorrelatedFeatures(variables=None, method='pearson', threshold=0.98)
Xt = tr.fit_transform(X)
tr.correlated_feature_sets_

[{'var_4', 'var_6', 'var_9'}]

In [33]:
import pandas as pd
from sklearn.datasets import make_classification
from feature_engine.selection import SmartCorrelatedSelection

# make dataframe with some correlated variables
def make_data():
    X, y = make_classification(n_samples=1000,
                               n_features=12,
                               n_redundant=4,
                               n_clusters_per_class=1,
                               weights=[0.50],
                               class_sep=2,
                               random_state=1)

    # trasform arrays into pandas df and series
    colnames = ['var_'+str(i) for i in range(12)]
    X = pd.DataFrame(X, columns=colnames)
    return X

X = make_data()

In [34]:
# set up the selector
tr = SmartCorrelatedSelection(
    variables=None,
    method="pearson",
    threshold=0.8,
    missing_values="raise",
    selection_method="variance",
    estimator=None,
)

Xt = tr.fit_transform(X)

tr.correlated_feature_sets_

[{'var_0', 'var_8'}, {'var_4', 'var_6', 'var_7', 'var_9'}]

In [35]:
tr.features_to_drop_

['var_0', 'var_4', 'var_6', 'var_9']

In [36]:
print(print(Xt.head()))

      var_1     var_2     var_3     var_5     var_7     var_8    var_10  \
0 -2.376400 -0.247208  1.210290  0.091527 -2.230170  2.070483  2.070526   
1  1.969326 -0.126894  0.034598 -0.186802 -1.447490  2.421477  1.184820   
2  1.499174  0.334123 -2.233844 -0.313881 -2.240741  2.263546 -0.066448   
3  0.075341  1.627132  0.943132 -0.468041 -3.534861  2.792500  0.713558   
4  0.372213  0.338141  0.951526  0.729005 -2.053965  2.186741  0.398790   

     var_11  
0 -1.989335  
1 -1.309524  
2 -0.852703  
3  0.484649  
4 -0.186530  
None
