# Wrapper Approach - Hill Climbing
In this notebook we implement a rather simple feature selection procedure that follows a wrapper approach. The search algorithm, hill climbing in this case, is wrapped around the target classification/regression algorithm.

First we import the libraries that we will need.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

from sklearn import datasets
from sklearn import linear_model
from sklearn import naive_bayes

from deap import algorithms
from deap import base
from deap import creator
from deap import tools

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

Next we load the data and generate the k-fold evaluations.

In [3]:
data = datasets.load_boston()

X = data["data"]
y = data["target"]

number_of_variables = X.shape[1]
input_variables = data.feature_names
target_variable = 'MEDV'

seed = 1234
np.random.seed(seed)

# let's create also a pandas data frame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['MEDV'] = y
df.head()

kfolds = KFold(10,shuffle=True,random_state=seed)

## Hill Climbing

In [45]:
def HillClimbing(number_of_variables,number_of_evaluation,evaluation_function):

    # current evaluation
    evaluations = 0
    
    current_feature_subset = [random.randint(0,1) for x in range(number_of_variables)]

    best_performance = evaluation_function(x)
    
    print("%5d\t\t%3.2f\t%s"%(evaluations,best_performance,str(current_feature_subset)))
    
    while evaluations<number_of_evaluations:
        
        perturbation = [(lambda : 1-x if (random.random()<0.1) else x)(x) for x in current_feature_subset]

        performance = evaluation_function(perturbation)
        
        if (performance>best_performance):
            best_performance = performance
            current_feature_subset = perturbation
            
        evaluations = evaluations + 1

    print("Best Feature Subset = %s "%(str(x)))
    print("Performance = %3.2f"%(best_performance))

In [6]:
[random.randint(0,1)]*number_of_variables

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [7]:
[random.randint(0,1)]*number_of_variables

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [8]:
[random.randint(0,1) for x in range(number_of_variables)]

[0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0]

In [11]:
def Perturbation(x,p=.01):
    """
    :param x: individual
    :param p: probability of perturbation
    :return: new individual
    """

    nx = []
    i = 0
    while i<len(x):
        if (random.random()<p):
            nx.append(1-int(x[i]))
        else:
            nx.append(int(x[i]))
        i = i + 1

    return tuple(nx)

In [13]:
individual = [0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0]

In [44]:
perturbation = [(lambda x: 1-x if (random.random()<0.1) else x)(x) for x in individual]
print(sum(perturbation))

9


In [38]:
f = (lambda x: 1-x if (random.random()<0.1) else x)

In [42]:
individual = [0]*100