In [1]:
import pandas as pd
import numpy as np

# Data Preprocessing
from sklearn.feature_selection import VarianceThreshold

In [2]:
def feature_set(data, data_type):
    """
    This function returns a list of categorical or continuous features
    Args:
        data (dataFrame): the required pandas dataFrame from where we want to separate the columns
        data_type (str): for categorical features: 'O', for continuous features: 'float64'
    Returns:
        separated_cols (list of str): list of categorical or continuous features
    """
    # create an empty list to hold the separated features
    separated_cols = []

    # separate the features based on the data type
    for col in data.columns:
        if data[col].dtype == data_type:
            separated_cols.append(col)
            
    return separated_cols

In [3]:
def variance_threshold_selector(data, threshold):
    """
    This function removes the features with a variance lower than the threshold
    Args:
        data (dataFrame): pandas DataFrame
        threshold (float): Features with variance lower than this threshold will be removed
    Returns:
        a dataFrame with the actual index and column names
    """
    selector = VarianceThreshold(threshold)
    selector.fit(data)
    
    return data[data.columns[selector.get_support(indices=True)]]