#Project: Hand Written Digit Recognition using Genetic Algorithm

In [1]:
pip install sklearn-genetic

Collecting sklearn-genetic
  Downloading sklearn_genetic-0.5.1-py3-none-any.whl (11 kB)
Collecting deap>=1.0.2
  Downloading deap-1.3.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (160 kB)
[K     |████████████████████████████████| 160 kB 7.0 MB/s 
Installing collected packages: deap, sklearn-genetic
Successfully installed deap-1.3.1 sklearn-genetic-0.5.1


In [29]:
import random
import numpy as np
from sklearn import datasets, linear_model
from genetic_selection import GeneticSelectionCV

# When using multiple processes (n_jobs != 1), protect the entry point of the program if necessary
if __name__ == "__main__":

    # Set seed for reproducibility
    random.seed(42)
    np.random.seed(42)

    digit = datasets.load_digits()

    # Some noisy data not correlated
    E = np.random.uniform(0, 0.1, size=(len(digit.data), 20))

    X = np.hstack((digit.data, E))
    y = digit.target

    estimator = linear_model.LogisticRegression(solver="liblinear", multi_class="ovr")

    selector = GeneticSelectionCV(
        estimator,
        cv=5,
        verbose=1,
        scoring="accuracy",
        max_features=7,
        n_population=50,
        crossover_proba=0.5,
        mutation_proba=0.2,
        n_generations=40,
        crossover_independent_proba=0.5,
        mutation_independent_proba=0.05,
        tournament_size=3,
        n_gen_no_change=10,
        caching=True,
        n_jobs=-1,
    )
    model = selector.fit(X, y)

Selecting features with genetic algorithm.
gen	nevals	avg                            	std                            	min                            	max                            
0  	50    	[ 0.316751  4.48      0.022101]	[ 0.140987  1.920833  0.012109]	[ 0.086252  1.        0.001459]	[ 0.642772  7.        0.050674]
1  	34    	[-1599.656257     6.42      1600.024823]	[ 3666.210579     2.154901  3666.049722]	[-10000.            3.            0.009134]	[     0.642772     15.        10000.      ]
2  	33    	[-2999.680109     6.72      3000.022701]	[ 4582.785113     1.990377  4582.560834]	[-10000.            3.            0.013045]	[     0.642772     12.        10000.      ]
3  	28    	[-1999.592294     6.66      2000.027355]	[ 4000.203854     2.159722  3999.986323]	[-10000.            3.            0.008576]	[     0.667807     14.        10000.      ]
4  	33    	[-2799.600143     6.9       2800.02612 ]	[ 4490.238219     1.627882  4489.972576]	[-10000.            4.            0.018954]

In [30]:
model.predict(X)

array([0, 1, 8, ..., 8, 9, 8])

In [31]:
X[1]

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.20000000e+01,
       1.30000000e+01, 5.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.10000000e+01,
       1.60000000e+01, 9.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 3.00000000e+00, 1.50000000e+01,
       1.60000000e+01, 6.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 7.00000000e+00, 1.50000000e+01, 1.60000000e+01,
       1.60000000e+01, 2.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.60000000e+01,
       1.60000000e+01, 3.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.60000000e+01,
       1.60000000e+01, 6.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.60000000e+01,
       1.60000000e+01, 6.00000000e+00, 0.00000000e+00, 0.00000000e+00,
      

In [32]:
model.predict([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.20000000e+01,
       1.30000000e+01, 5.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.10000000e+01,
       1.60000000e+01, 9.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 3.00000000e+00, 1.50000000e+01,
       1.60000000e+01, 6.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 7.00000000e+00, 1.50000000e+01, 1.60000000e+01,
       1.60000000e+01, 2.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.60000000e+01,
       1.60000000e+01, 3.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.60000000e+01,
       1.60000000e+01, 6.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.60000000e+01,
       1.60000000e+01, 6.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.10000000e+01,
       1.60000000e+01, 1.00000000e+01, 0.00000000e+00, 0.00000000e+00,
       6.11852895e-02, 1.39493861e-02, 2.92144649e-02, 3.66361843e-02,
       4.56069984e-02, 7.85175961e-02, 1.99673782e-02, 5.14234438e-02,
       5.92414569e-02, 4.64504127e-03, 6.07544852e-02, 1.70524124e-02,
       6.50515930e-03, 9.48885537e-02, 9.65632033e-02, 8.08397348e-02,
       3.04613769e-02, 9.76721140e-03, 6.84233027e-02, 4.40152494e-02]])

array([1])

In [33]:
model.score(X,y)

0.7779632721202003