#### RFE (Recursive Feature Elimination)

In [5]:
import pandas as pd
import statsmodels.api as sm
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
def vif(data_frame):
    
    # Función VIF para calcular factores de inflación de la varianza
    vif_data = pd.DataFrame()
    vif_data["Variable"] = data_frame.columns
    vif_data["VIF"] = [variance_inflation_factor(data_frame.values, i) for i in range(data_frame.shape[1])]
    return vif_data

def feature_selection(data_path):
    
    # Cargar el DataFrame desde el archivo
    data = pd.read_csv(data_path)
    
    # Crear una copia del DataFrame
    data4 = data.copy()
    
    # Crear el modelo OLS y obtener p-values
    model_string = 'MEDV~' + '+'.join(data4.drop('MEDV', axis=1).columns)
    model = sm.OLS.from_formula(model_string, data=data4).fit()
    pvalues_sorted = model.pvalues.sort_values(ascending=False)
    
    # Eliminar la columna con el p-value más grande
    column_to_drop = pvalues_sorted.nlargest(1).index[0]
    data4.drop(column_to_drop, axis=1, inplace=True)

    # Preparar datos para VIF
    x = data4.drop('MEDV', axis=1)
    
    # Calcular VIF
    vif_result = vif(x)
    
    # Realizar selección de características con RFE
    model_lr = LinearRegression()
    rfe = RFE(model_lr, n_features_to_select=7)
    X_rfe = rfe.fit_transform(x, data4['MEDV'])
    model_lr.fit(X_rfe, data4['MEDV'])
    rfe_score = model_lr.score(X_rfe, data4['MEDV'])
    rfe_support = rfe.support_
    rfe_ranking = rfe.ranking_
    return {
        'data_after_feature_selection': data4,
        'vif_results': vif_result,
        'rfe_score': rfe_score,
        'rfe_support': rfe_support,
        'rfe_ranking': rfe_ranking
    }
# Uso de la función para archivo 'boston_clear.txt'
result = feature_selection('boston_clear.txt')

# Imprimir los resultados o realizar acciones adicionales según sea necesario
print(result['data_after_feature_selection'])
print(result['vif_results'])
print(result['rfe_score'])
print(result['rfe_support'])
print(result['rfe_ranking'])

        CRIM    ZN  INDUS  CHAS    NOX     RM     DIS  RAD    TAX  PTRATIO  \
0    0.00632  18.0   2.31     0  0.538  6.575  4.0900    1  296.0     15.3   
1    0.02731   0.0   7.07     0  0.469  6.421  4.9671    2  242.0     17.8   
2    0.02729   0.0   7.07     0  0.469  7.185  4.9671    2  242.0     17.8   
3    0.03237   0.0   2.18     0  0.458  6.998  6.0622    3  222.0     18.7   
4    0.06905   0.0   2.18     0  0.458  7.147  6.0622    3  222.0     18.7   
..       ...   ...    ...   ...    ...    ...     ...  ...    ...      ...   
501  0.06263   0.0  11.93     0  0.573  6.593  2.4786    1  273.0     21.0   
502  0.04527   0.0  11.93     0  0.573  6.120  2.2875    1  273.0     21.0   
503  0.06076   0.0  11.93     0  0.573  6.976  2.1675    1  273.0     21.0   
504  0.10959   0.0  11.93     0  0.573  6.794  2.3889    1  273.0     21.0   
505  0.04741   0.0  11.93     0  0.573  6.030  2.5050    1  273.0     21.0   

          B  LSTAT  MEDV  
0    396.90   4.98  24.0  
1    396.