In [1]:
# Import modules
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# Import PySwarms
import pyswarms as ps
from pyswarms.utils.plotters import plot_cost_history
from sklearn import linear_model
from sklearn.metrics import roc_auc_score, make_scorer
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

In [2]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=100, n_features=20, bias=0,
                           n_informative=5,
                           random_state=1)

In [3]:
# Plot toy dataset per feature
df = pd.DataFrame(X)
df['labels'] = y

In [4]:
from sklearn import linear_model
regressor = linear_model.LinearRegression()

In [5]:
#measure fitness error - optimiser aims to find minima solution
def objective_fcn(y_true, y_pred, **kwargs):

    """ Higher-level function to compue the objective function value for a particle 
    
    Inputs
    ------

    y_true:

    y_pred:

    **kwargs: arguments needed to compute objective function. this will be dependent upon the function expression.
    
    Will be things like: 
        score/loss, 
        total_number_of_features, 
        number_of_selected_features, 
        alpha: The balancing value
    
    """
    #breakpoint()
    p = kwargs['P'](y_true,y_pred) #objective 1
    #kwargs['ratio_selected_features'] is objective 2
    
    j = obj_function_equation(p, kwargs['ratio_selected_features'], kwargs['alpha'])
    
    return j

def obj_function_equation(obj_1, obj_2, alpha):
    j = (alpha * (1-obj_1) + (1.0 - alpha) * (obj_2))
    
    return j

def f_per_particle(m, alpha, X, y, P):
    """Computes for the objective function per particle

    Inputs
    ------
    m : numpy.ndarray
        Binary mask that can be obtained from BinaryPSO, will
        be used to mask features.
    alpha: float (default is 0.5)
        Constant weight for trading-off classifier/regressor performance
        and number of features
    X: data to be used for CV
    y: labels to be used for CV

    Returns
    -------
    numpy.ndarray
        Computed objective function
    """
    total_features = X.shape[1]
    # Get the subset of the features from the binary mask
    if np.count_nonzero(m) == 0:
        X_subset = X
    else:
        X_subset = X[:,m==1]

    ratio_selected_features = X_subset.shape[1]/total_features
    
    #Particle fittness error/loss computed using cross validation
    fitness_error = make_scorer(objective_fcn,  ratio_selected_features=ratio_selected_features, P=P, alpha=alpha)
    scores = cross_val_score(regressor, X_subset, y, cv=10, scoring=fitness_error)
  
    j = scores.mean()
    return j
def f(swarm, X,y, performance_metric,alpha):
    """Higher-level method to do classification/regression in the
    whole swarm.

    Inputs
    ------
    swarm: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    X: data to pass into f_per_particle() function
    
    y: label data to pass into f_per_particle() function

    alpha: 

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    
    n_particles = swarm.shape[0]

    j = [f_per_particle(swarm[particle], alpha, X, y, performance_metric) for particle in range(n_particles)]
    return np.array(j)

In [7]:
                                        ###############
                                        # Driver Code #
                                        ###############

from pyswarms.utils.search.grid_search import GridSearch
import inspect
            


#source_GS = inspect.getsource(GridSearch)

#print(source_GS)


class GridSearchUpdate(GridSearch):
    
    def __init__(        
        self,
        optimizer,
        n_particles,
        dimensions,
        options,
        objective_func,
        iters,
        bounds=None,
        velocity_clamp=(0, 1),
        **kwargs
    ):
        
        super(GridSearchUpdate,self).__init__(
            optimizer,
            n_particles,
            dimensions,
            options,
            objective_func,
            iters,
            bounds=bounds,
            velocity_clamp=velocity_clamp,
        )
        self.kwargs = kwargs
        # invoke assertions
        self.assertions()
    
    def generate_score(self, options):
        """Generate score for optimizer's performance on objective function

        Parameters
        ----------

        options: dict
            a dict with the following keys: {'c1', 'c2', 'w', 'k', 'p'}
        """

        # Intialize optimizer
        f = self.optimizer(
            self.n_particles, self.dims, options, self.bounds, velocity_clamp=self.vclamp
        )
        
        #print(self.kwargs)
        #breakpoint()

        # Return score
        return f.optimize(self.objective_func, iters = self.iters,**self.kwargs)
    
    def search(self, maximum=False):
        import operator as op
        """Compare optimizer's objective function performance scores
        for all combinations of provided parameters

        Parameters
        ----------

        maximum: bool
            a bool defaulting to False, returning the minimum value for the
            objective function. If set to True, will return the maximum value
            for the objective function.
        """

        # Generate the grid of all hyperparameter value combinations
        grid = self.generate_grid()

        # Calculate scores for all hyperparameter combinations
        scores = [self.generate_score(i)[0] for i in grid]
        
        print(min(scores))
        print(len(scores))

        # Default behavior
        idx, self.best_score = min(enumerate(scores), key=op.itemgetter(1))

        # Catches the maximum bool flag
        if maximum:
            idx, self.best_score = max(enumerate(scores), key=op.itemgetter(1))

        # Return optimum hyperparameter value property from grid using index
        self.best_options = op.itemgetter(idx)(grid)
        return self.best_score, self.best_options

#source_GSU = inspect.getsource(GridSearchUpdate)    

#optimizer = ps.discrete.BinaryPSO(n_particles=30, dimensions=dimensions, options=options)

# Initialize swarm, arbitrary: See academic papers on initialisations
options = {'c1': [1, 2, 3],
               'c2': [1, 2, 3],
               'w' : [2, 3, 5],
               'k' : [5, 10, 15],
               'p' : 1}

# Call instance of PSO
dimensions = X.shape[1] # dimensions should be the number of features

g = GridSearchUpdate(ps.discrete.BinaryPSO, n_particles=30, dimensions=dimensions,
                   options=options, objective_func=f, iters=10, X=X, y=y, performance_metric = r2, alpha=0.5)

# Perform optimization
#pass eval metrics in here? See codebase
#cost, pos = optimizer.optimize(f,  iters=100, verbose=True, X=X, y=y, performance_metric = r2, alpha=0.5)

best_score, best_options = g.search()
print('hey')
print(best_score)
print(best_options)

# Create two instances of LinearRegression
# r1 = GradientBoostingRegressor(n_estimators=2, learning_rate=0.1, max_depth=1, random_state=0, loss='ls')
# r2 = GradientBoostingRegressor(n_estimators=2, learning_rate=0.1, max_depth=1, random_state=0, loss='ls')

# # Get the selected features from the final positions
# X_selected_features = X[:,pos==1]  # subset

# # Compute performance using CV
# scores = cross_validate(r1, X_selected_features, y, cv=10, scoring='neg_root_mean_squared_error')
# scores2 = cross_validate(r2, X, y, cv=10, scoring='neg_root_mean_squared_error')

# subset_performance = scores['test_score'].mean()
# wholeset_performance = scores2['test_score'].mean()


# print('Subset fitness cost/loss: %.3f' % (cost))
# print('Subset performance: %.3f' % (subset_performance))
# print('Full set performance: %.3f' % (wholeset_performance))

2021-08-20 14:07:52,379 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1, 'c2': 1, 'w': 2, 'k': 5, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.182
2021-08-20 14:07:55,159 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.18174092653901508, best pos: [1 0 0 1 0 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0]
2021-08-20 14:07:55,167 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1, 'c2': 1, 'w': 2, 'k': 10, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.235
2021-08-20 14:07:57,721 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.23468657307453272, best pos: [1 1 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 1 1 0]
2021-08-20 14:07:57,728 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1, 'c2': 1, 'w': 2, 'k': 15, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.25
2021-08-20 14:08:00,312 - pyswarms.discrete.binary - INFO - Optimization finis

2021-08-20 14:08:56,415 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1, 'c2': 3, 'w': 3, 'k': 15, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.207
2021-08-20 14:08:59,754 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.20718309398113094, best pos: [1 0 1 1 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 0]
2021-08-20 14:08:59,762 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1, 'c2': 3, 'w': 5, 'k': 5, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.275
2021-08-20 14:09:03,108 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.275, best pos: [1 0 0 1 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0 0]
2021-08-20 14:09:03,116 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1, 'c2': 3, 'w': 5, 'k': 10, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.307
2021-08-20 14:09:06,445 - pyswarms.discrete.binary - INFO - Optimization finished | best co

2021-08-20 14:10:11,718 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 2, 'c2': 3, 'w': 2, 'k': 10, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.217
2021-08-20 14:10:15,030 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.21671299927206317, best pos: [1 1 0 0 0 1 0 0 0 0 0 0 1 1 0 1 0 1 1 0]
2021-08-20 14:10:15,039 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 2, 'c2': 3, 'w': 2, 'k': 15, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.232
2021-08-20 14:10:18,361 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.23187306951030778, best pos: [1 0 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 1 1 0]
2021-08-20 14:10:18,369 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 2, 'c2': 3, 'w': 3, 'k': 5, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.217
2021-08-20 14:10:21,854 - pyswarms.discrete.binary - INFO - Optimization fini

2021-08-20 14:11:30,234 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 3, 'c2': 2, 'w': 5, 'k': 5, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.242
2021-08-20 14:11:33,732 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.24202177773162062, best pos: [1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 0 1 1 1]
2021-08-20 14:11:33,740 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 3, 'c2': 2, 'w': 5, 'k': 10, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.242
2021-08-20 14:11:37,269 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.2422977588183266, best pos: [1 1 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 1 0 1]
2021-08-20 14:11:37,278 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 3, 'c2': 2, 'w': 5, 'k': 15, 'p': 1}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=0.275
2021-08-20 14:11:40,752 - pyswarms.discrete.binary - INFO - Optimization finis

[0.18174092653901508, 0.23468657307453272, 0.25, 0.267901244769686, 0.24214142027544713, 0.20975174379623973, 0.3322554346815994, 0.1907406222884433, 0.26724350532482305, 0.25, 0.20695011617608006, 0.25, 0.35763445286377715, 0.2600335722812665, 0.20983230606603193, 0.32500000000000007, 0.21627759974867827, 0.24166747129853022, 0.29999999999999993, 0.2415781434336211, 0.1847787035186455, 0.23227640303621153, 0.25702739534422914, 0.20718309398113094, 0.275, 0.30719199872109104, 0.2174023796977443, 0.26003521587823875, 0.2414844754245827, 0.2345104907496484, 0.2597464544100808, 0.21605424686291488, 0.3098227642170813, 0.25, 0.30715253463006664, 0.31016880532310553, 0.26018120868351013, 0.23175630306988487, 0.2598426710811524, 0.25, 0.29999999999999993, 0.232243301893236, 0.375, 0.29999999999999993, 0.21721838756187237, 0.23254259555899273, 0.21671299927206317, 0.23187306951030778, 0.21665992419027957, 0.20687695917982002, 0.16668834112945025, 0.2825307885447237, 0.2849160456897491, 0.225,

In [None]:
print('hey')