[View in Colaboratory](https://colab.research.google.com/github/Gagan-K-Shetty/Machine_learning/blob/master/GA_example.ipynb)

In [0]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from functools import reduce
from operator import add
import random
import numpy as np
import sklearn

In [0]:
data = load_iris()
X = data.data
y = data.target

In [0]:
class data_loader:
    '''
    data_loader class
    '''
    def __init__(self, data, target, batch_size = 32, train_test_val=False):
        '''
        Initializes the object.
        args :
            data : the features
            target : the targets
            batch_size (integer): the minibatch size
            train_test_val (bool): in case we want a validation set too
        '''
        self.data = data
        self.target = target
        self.batch_size = batch_size
        self.iterator = 0
        self.num_batches = np.ceil(data.shape[0]/batch_size)
        self.train_test_val = train_test_val
        self.split=False
        
    def split_to_sets(self,splitter_function = sklearn.model_selection.train_test_split, args={}):
        '''
        splitter function is the function used for the splitting the data into training/testing/validation sets. 
        This will split the data passed while initialising and store in the train/test/val.
        args will be the dictionary to hold the parameter(Other than the data) 
        '''
        if self.train_test_val :
            #create validation set too
            self.train_data, self.test_data, self.train_target, self.test_target = splitter_function(self.data,self.target,**args)
            self.train_data, self.val_data, self.train_target, self.val_target = splitter_function(self.train_data,self.train_target,**args)
        else :
            #just train test sets
            self.train_data, self.test_data, self.train_target, self.test_target = splitter_function(self.data,self.target,**args)
        self.split = True
    
    def get_scoring_set(self):
        if self.train_test_val :
            return self.val_data,self.val_target
        else:
            return self.test_data,self.test_target
    
    def get_train(self):
        return self.train_data,self.train_target
    
    def get_test(self):
        return self.test_data,self.test_target
    
    def get_validation(self):
        return self.val_data,self.val_target
    
    def get_next_batch(self):
        '''
        returns the next set of examples based on the minibatch size and data given while initializing the object
        '''
        if self.split == False:
            #if you didnt want to split into train and test sets
            data = self.data
            target = self.target
        else:
            #if you split into train test sets
            data = self.train_data
            target = self.train_target
            
        start = int(self.iterator*self.batch_size)
        end = int(min((self.iterator+1)*self.batch_size,self.data.shape[0]))
        temp_data,temp_target = data[start:end],target[start:end]
        self.iterator = (self.iterator + 1)%self.num_batches
        return temp_data, temp_target

class model:
    def __init__(self,model,params,verbose = True):
        '''
        creates an instance of the model class
        args :-
            model : machine learning/ deeplearning  model
            params : dictionary of parameters to initialize the model
        '''
        self.params = params
        self.model = model(**params)
        self.trained = False
        self.verbose = verbose
        
    def train(self,data_loader):
        if self.trained:
            return "Already Trained"
        self.model.fit(*data_loader.get_train())
        self.score = self.model.score(*data_loader.get_scoring_set())
        if self.verbose:
            print("Fitted with ",self.score,self.params)
        self.trained = True
        
    def fitness(self):
        return self.score
    
    def __str__(self):
        return self.model.__str__()
    
class model_generator():
    def __init__(self,model,param_choices):
        '''
        creates a model generator instance.
        args : 
            model : the sklearn or any equivalent model instance.
            param_choices (dict) : All the network parameters. For example, for the SVC in sklearn, the param_choices = {'C':[1,2,3,4],'kernel':["linear","rbf","sigmoid"]}
        '''
        self.model = model
        self.param_choices = param_choices
        
    def create_random(self):
        '''
        return a model instance initialised with a random selection of the choices
        '''
        params = {key:random.choice(value) for key,value in self.param_choices.items()}
        return model(self.model,params)
    
    def create_set(self,params):
        return model(self.model,params)
    
class GA_search():
    def __init__(self, data_loader, model_generator, retain=0.4, random_select=0.1, mutate_chance=0.2):
        """
        Create a GA search instance.
        Args : 
            
            data_loader (data_loader) : an instance of the data_loader class.
            model (model_generator) : an instance of the model_generator class.
            retain (float): Percentage of population to retain after each generation.
            random_select (float): Probability of a rejected network remaining in the population.
            mutate_chance (float): Probability a network will be randomly mutated.
        """
        self.mutate_chance = mutate_chance
        self.random_select = random_select
        self.retain = retain
        self.data_loader = data_loader
        self.model_generator = model_generator
        self.max_fitness = 0
        self.best_model = None
    
    def create_population(self, count):
        """Create a population of random networks.
        Args:
            count (int): Number of models to generate, aka the size of the population
        Returns:
            (list): Population of model objects, where each element in the list is a instance of the class 'model'
        """
        pop = [self.model_generator.create_random() for _ in range(count)]
        '''for _ in range(0, count):
            # Create a random network.
            network = Network(self.model,self.param_choices)
            clf = network.create_random()

            # Add the network to our population.
            pop.append(clf)
        '''
        return pop
    
    @staticmethod
    def fitness(model):
        """
        Return the accuracy, which is our fitness function.
        model is an instance of 
        """
        return model.fitness()
    
    def grade(self, pop):
        """Find average fitness for a population.
        Args:
            pop (list): The population of models
        Returns:
            (float): The average accuracy of the population
        """
        summed = reduce(add, (self.fitness(model) for model in pop))
        return summed / float((len(pop)))
    
    def breed(self, mother, father):
        """Make two children as parts of their parents.
        Args:
            mother (dict): model parameters
            father (dict): model parameters
        Returns:
            (list): Two model objects
        """
        children = []
        for _ in range(2):

            child = {}

            # Loop through the parameters and pick params for the kid.
            for param in self.model_generator.param_choices.keys():
                child[param] = random.choice(
                    [mother[param], father[param]]
                )

            # Now create a network object.
            '''network = Network(self.model,self.param_choices)
            clf = network.create_set(child)'''
            model = self.model_generator.create_set(child)

            # Randomly mutate some of the children.
            if self.mutate_chance > random.random():
                #print("Mutating")
                model = self.mutate(model.params)

            children.append(model)

        return children

    def mutate(self, params):
        """Randomly mutate one part of the network.
        Args:
            params (dict): The parameters to mutate
        Returns:
            (model): A randomly mutated model object
        """
        # Choose a random key.
        mutation = random.choice(list(self.model_generator.param_choices.keys()))

        # Mutate one of the params.
        #print(params[mutation])
        params[mutation] = random.choice(self.model_generator.param_choices[mutation])
        #print(params[mutation])
        return self.model_generator.create_set(params)
    
    def evolve(self, pop):
        """Evolve a population.
        Args:
            pop (list): A list of models
        Returns:
            (list): The evolved population of models
        """
        # Get scores for each network.
        
        #training
        _ = list(map(lambda x:x.train(self.data_loader), pop))
        
        graded_pair = [(model.fitness(), model) for model in pop]
        
        # Sort on the scores.
        graded = [x[1] for x in sorted(graded_pair, key=lambda x: x[0], reverse=True)]
        
        if max([x for x,y in graded_pair]) > self.max_fitness:
            self.max_fitness = max([x for x,y in graded_pair])
            self.best_model = graded[0]
        
        # Get the number we want to keep for the next gen.
        retain_length = int(len(graded)*self.retain)
        
        # The parents are every model we want to keep.
        parents = graded[:retain_length]
        
        # For those we aren't keeping, randomly keep some anyway.
        for individual in graded[retain_length:]:
            if self.random_select > random.random():
                parents.append(individual)
        
        # Now find out how many spots we have left to fill.
        parents_length = len(parents)
        desired_length = len(pop) - parents_length
        children = []

        # Add children, which are bred from two remaining models.
        while len(children) < desired_length:

            # Get a random mom and dad.
            male = random.randint(0, parents_length-1)
            female = random.randint(0, parents_length-1)
            
            # Assuming they aren't the same model...
            if male != female:
                male = parents[male]
                female = parents[female]

                # Breed them.
                babies = self.breed(male.params, female.params)

                # Add the children one at a time.
                for baby in babies:
                    # Don't grow larger than desired length.
                    if len(children) < desired_length:
                        children.append(baby)

        parents.extend(children)
        print("Best so far",self.best_model.params,"with a fitness of ",self.max_fitness)
        return parents
    


In [0]:
loader = data_loader(X,y,train_test_val=True)
loader.split_to_sets()
gen = model_generator(SVC,{'C' : [5,4,6,2,3,7,1],'kernel' : ["linear","rbf","poly","sigmoid"]})

In [5]:
Ga = GA_search(loader,gen)
pop = Ga.create_population(10)
print(pop[2])

SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='sigmoid',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


In [6]:
generations = 5
for gen in range(generations):
    print("Gen",gen+1)
    pop = Ga.evolve(pop);

Gen 1
Fitted with  1.0 {'C': 6, 'kernel': 'linear'}
Fitted with  1.0 {'C': 7, 'kernel': 'linear'}
Fitted with  0.21428571428571427 {'C': 5, 'kernel': 'sigmoid'}
Fitted with  0.9642857142857143 {'C': 6, 'kernel': 'poly'}
Fitted with  1.0 {'C': 3, 'kernel': 'linear'}
Fitted with  0.21428571428571427 {'C': 5, 'kernel': 'sigmoid'}
Fitted with  0.21428571428571427 {'C': 3, 'kernel': 'sigmoid'}
Fitted with  1.0 {'C': 7, 'kernel': 'rbf'}
Fitted with  0.21428571428571427 {'C': 6, 'kernel': 'sigmoid'}
Fitted with  1.0 {'C': 5, 'kernel': 'linear'}
Best so far {'C': 6, 'kernel': 'linear'} with a fitness of  1.0
Gen 2
Fitted with  1.0 {'C': 3, 'kernel': 'linear'}
Fitted with  1.0 {'C': 4, 'kernel': 'linear'}
Fitted with  1.0 {'C': 7, 'kernel': 'linear'}
Fitted with  1.0 {'C': 2, 'kernel': 'linear'}
Fitted with  1.0 {'C': 7, 'kernel': 'linear'}
Fitted with  1.0 {'C': 6, 'kernel': 'linear'}
Best so far {'C': 6, 'kernel': 'linear'} with a fitness of  1.0
Gen 3
Fitted with  1.0 {'C': 6, 'kernel': 'lin