# LeaveOneFeatureOut (LOFO):
* This is one of the most powerful and guaranteed techniques for feature selection. It iteratively drops out one feature at a time and tests the performance of your selected model on your selected folds. If the dropout of the feature improves the CV score, then the training set gets updated and continues the iterations for furthur improvement. <br>
* This notebook contains an easy-to-use code for the purpose of this competition, however, it can be used for any other tabular dataset and for any task with just little modifications. <br>
* All you need is to pass your training set (X), label (y), model, n_splits (for KFold cross validation), and evaluation metric.
* Because this might take a significant amount of time in the case of 100 features, this code will be run of the first 10 features only.
* Moreover, you can use a simpler model to speedup the process of FS.

In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection, metrics
from catboost import CatBoostRegressor

In [2]:
train = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
X = train.drop('loss', axis=1).iloc[:, :10] # only the ten first features will be used for the purpose of demonstration
y = train.loss
test = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv').iloc[:, :10]

In [3]:
# just run this cell

class LOFO(object):
    
    def __init__(self, data, labels, model, n_splits, eval_metric):
        self._data = data
        self._labels = labels
        self.model = model
        self.n_splits = n_splits
        self.eval_metric = eval_metric
        
        
    def kfold(self, x, y, model, n_splits, eval_metric):
        from sklearn import model_selection
        
        preds_valid_array = np.zeros(y.shape[0])
        
        train_scores = []
        valid_scores = []
        
        kf = model_selection.KFold(n_splits=n_splits)
        for fold, (train_idx, valid_idx) in enumerate(kf.split(x)):
            
            print(f"===================== Fold {fold+1} =====================")
            x_train, y_train = x[train_idx, :], y[train_idx]
            x_valid, y_valid = x[valid_idx, :], y[valid_idx]
            
            self.model.fit(
                          x_train, y_train,
                          eval_set=[(x_valid, y_valid)],
                          verbose=100
                         )
            
            preds_valid = model.predict(x_valid)
            preds_train = model.predict(x_train)
            
            valid_score = np.sqrt(eval_metric(y_valid, preds_valid))
            train_score = np.sqrt(eval_metric(y_train, preds_train))
            
            valid_scores.append(valid_score)
            train_scores.append(train_score)
            
            preds_valid_array[valid_idx] += preds_valid
            
        print("Mean valid score =", np.mean(valid_scores), "STD valid score = ", np.std(valid_scores, ddof=1))
        print("Mean train score =", np.mean(train_scores), "STD train score = ", np.std(train_scores, ddof=1))
        
        cv_score = np.mean(valid_scores)
        return cv_score, preds_valid_array
    
    def selectionLoop(self, x, y, model, n_splits, eval_metric):
        
        print("All Features")
        cv_score, preds_valid = self.kfold(x, y, model, n_splits, eval_metric)
        score = cv_score
        scores = []
        good_scores = []
        scores.append(score)
        good_scores.append(score)
        harmful_features = []
        print("=================================================")
        
        
        for i in range(x.shape[1]):
            
            print(f"Drop Feature {i}")
            x2 = pd.DataFrame(x, columns=[f"col_{i}" for i in range(x.shape[1])])
            x2 = x2.drop(x2.columns[i], axis=1)
            x2 = x2.dropna(axis=1, how='all').values
            cv_score, preds_valid = self.kfold(x2, y, model, n_splits, eval_metric)  
            if cv_score < score:
                score = cv_score
                print("Improved Score =", score)
                good_scores.append(score)
                harmful_features.append(i)
                x = pd.DataFrame(x, columns=[f"col_{i}" for i in range(x.shape[1])])
                x.iloc[:, i] = np.nan
                x = x.values
                print("=================================================")
                
            else:
                continue
        
        print("Good scores :", good_scores)
        print("Harmful features :", harmful_features)
        
        return good_scores, harmful_features
    
    def transform(self, X, test):
        
        X = self._data
        y = self._labels
        model = self.model
        n_splits = self.n_splits
        eval_metric = self.eval_metric
        
        good_scores, harmful_features = self.selectionLoop(X.values, y.values, model, n_splits, eval_metric)
        X = X.drop(list(X.columns.tolist()[i] for i in harmful_features), axis=1)
        X = X.dropna(axis=1, how='all')
        test = test.drop(list(test.columns.tolist()[i] for i in harmful_features), axis=1)
        
        return X, test

In [4]:
# pass your arguments here
# X, y and test should be in the form of Pandas DataFrames    
lofo = LOFO(X, y, model=CatBoostRegressor(
                                   learning_rate=0.03,
                                   iterations=10000,
                                   loss_function='RMSE',
                                   eval_metric='RMSE',
                                   use_best_model=True,
                                   early_stopping_rounds=100
                                   ), n_splits=5, eval_metric=metrics.mean_squared_error)

X, test = LOFO.transform(lofo, X, test)

All Features
0:	learn: 7.9469448	test: 7.9113945	best: 7.9113945 (0)	total: 77.5ms	remaining: 12m 54s
100:	learn: 7.9313693	test: 7.9046870	best: 7.9045045 (97)	total: 1.88s	remaining: 3m 3s
200:	learn: 7.9225553	test: 7.9044847	best: 7.9044847 (200)	total: 3.65s	remaining: 2m 57s
300:	learn: 7.9147798	test: 7.9044668	best: 7.9041505 (263)	total: 5.42s	remaining: 2m 54s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 7.904150506
bestIteration = 263

Shrink model to first 264 iterations.
0:	learn: 7.9517648	test: 7.8921421	best: 7.8921421 (0)	total: 19.1ms	remaining: 3m 11s
100:	learn: 7.9367418	test: 7.8847378	best: 7.8847378 (100)	total: 1.83s	remaining: 2m 59s
200:	learn: 7.9278091	test: 7.8835805	best: 7.8834885 (163)	total: 3.62s	remaining: 2m 56s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 7.883488528
bestIteration = 163

Shrink model to first 164 iterations.
0:	learn: 7.9329327	test: 7.9677178	best: 7.9677178 (0)	total: 19.6ms	remaining: 

In [5]:
# let's see the reduction in X and test sets after performing LOFO
print("X shape after transformation :", X.shape)
print("test shape after transformation :", test.shape)

X shape after transformation : (250000, 7)
test shape after transformation : (150000, 7)
