In [17]:
# Setup autoreload
%load_ext autoreload
%autoreload 2

# Give yourself access to common
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from common import *

import numpy as np
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

def perform_rfe(X, y, n_features_to_select=1):
    """
    Performs Recursive Feature Elimination and ranks the features.
    
    Parameters:
    X (numpy.ndarray): A numpy array containing data points.
    y (numpy.ndarray): A numpy array containing target values.
    n_features_to_select (int): The number of features to select, default is 1.
    
    Returns:
    list: A list containing tuples with feature index and corresponding rank.
    """
    # Initialize the estimator, in this case, a linear regression model
    estimator = LinearRegression()

    # Create an RFE object with the specified estimator and number of features to select
    rfe = RFE(estimator, n_features_to_select=n_features_to_select, step=1)

    # Fit the RFE object to the data
    rfe.fit(X, y)

    # Get feature rankings
    feature_rankings = list(enumerate(rfe.ranking_))

    # Sort the features based on their rank
    sorted_features = sorted(feature_rankings, key=lambda x: x[1])

    return sorted_features

# Gather training and testing data
from common import *
from sklearn.model_selection import train_test_split

nba_extern = get_cleaned_external_data()
nba_base = get_cleaned_baseline_data()
nba_extern = add_log_y_values(nba_extern)
nba_base = add_log_y_values(nba_base)
print(nba_extern.shape[1])
print(nba_base.shape[1])
print(nba_extern.columns)

def format_data(data) :
    X_train, X_test, y_train, y_test = split_data(data, time_based_split=False)
    X = np.concatenate((X_train, X_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)
    return X, y

nba_base_x, nba_base_y = format_data(nba_base)
nba_extern_x, nba_extern_y = format_data(nba_extern)

print(perform_rfe(nba_base_x, nba_base_y))
print(perform_rfe(nba_extern_x, nba_extern_y))


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
33
17
Index(['seasonStartYear', 'MP', 'PTS', 'Age', 'games', 'games_started', 'PER',
       'FTr', 'AST', 'STL', 'TRB', 'FT', '3P', 'FG', 'height', 'weight',
       'startYear', 'all_star_total', 'all_star_enc', 'all_nba_enc',
       'all_nba_total', 'draft_pick', 'champion', 'conference_champ', 'mvp',
       'mvp_rank', 'mvp_total', 'player_week_enc', 'player_week_total', 'dpoy',
       'dpoy_rank', 'dpoy_total', 'inflationAdjSalary_log'],
      dtype='object')
[(6, 1), (5, 2), (2, 3), (13, 4), (8, 5), (3, 6), (14, 7), (0, 8), (7, 9), (11, 10), (9, 11), (1, 12), (4, 13), (12, 14), (10, 15)]
[(28, 1), (6, 2), (16, 3), (25, 4), (23, 5), (30, 6), (17, 7), (26, 8), (19, 9), (2, 10), (13, 11), (15, 12), (27, 13), (22, 14), (18, 15), (21, 16), (3, 17), (4, 18), (5, 19), (14, 20), (8, 21), (10, 22), (1, 23), (9, 24), (11, 25), (0, 26), (12, 27), (7, 28), (20, 29), (29, 30), (24, 31)]
