In [None]:
import pandas as pd
from joblib import load
from tqdm import tqdm
from deap import base, creator, tools, algorithms
import numpy as np
import random

In [None]:
adidas_data = pd.read_csv('AdidasMatchday20240419.csv')

loaded_pipeline = load('my_pipeline.joblib')

In [None]:
# Class for scoring a sample consisting of:
# 2 categories (sport, sex) and 6 measurements
class Score:
    def __init__(self, pipeline):
        self.pipeline = pipeline

    def __call__(self, category, genders, measurements):
        df = pd.DataFrame(measurements, columns=[
            'insole_length', 'ball_girth', 'ball_width',
            'heel_width', 'toe_height', 'heel_spring'
        ])

        df['sports_categories'] = category
        df['gender_last'] = genders

        return self.pipeline.predict(df)


scorer = Score(loaded_pipeline)
scorer(['Football'], ['Female'], [[277.4, 254.0, 95.7, 69.5, 27.4, 5.0]])

array([0.15119873], dtype=float32)

In [None]:
# Define the assessment function
def assessment_function(individual: list):
  """
  individual: list of 6 numbers representing measurements
  sex+sport: str, that are defined in environment further
  """
  return scorer([sport], [sex], [individual])

# We want to minimize the loss --> negative weight
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# Define a range for each feature and corresponding step size
feature_ranges = {
    'insole_length': [(230, 310),0.1],
    'ball_girth': [(200, 280),0.5],
    'ball_width': [(70, 110),0.1],
    'heel_width': [(45, 78),0.5],
    'toe_height': [(15, 40),0.1],
    'heel_spring': [(0.5, 25),0.5]
}

# Define a function to generate random values for each feature within its specified range
def generate_feature(feature_name, feature_range):
  ran, step = feature_range
  start, stop = ran
  num_elements = int((stop - start) / step)+1
  random_sample = start + step * random.randint(0, num_elements)
  return random_sample

# Register the feature generator function with DEAP
for feature_name, feature_range in feature_ranges.items():
    toolbox.register("attr_" + feature_name, generate_feature, feature_name, feature_range)

toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_insole_length, toolbox.attr_ball_girth,
                  toolbox.attr_ball_width, toolbox.attr_heel_width,
                  toolbox.attr_toe_height, toolbox.attr_heel_spring), n=1)

In [None]:
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", assessment_function)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=3, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)

In [None]:
# Genetic algorithm parameters
P_S = 1000
NGEN = 1000  # Number of generations
CXPB = 0.5  # Crossover probability
MUTPB = 0.2  # Mutation probability

In [None]:
result = dict()

sex = "Female"
sport = "Running"

population = toolbox.population(n=P_S)

for gen in tqdm(range(NGEN)):
    offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fits = toolbox.map(toolbox.evaluate, offspring)
    for fit, ind in zip(fits, offspring):
        ind.fitness.values = (fit,)
    population = toolbox.select(offspring, len(population))

best_ind = tools.selBest(population, 1)[0]
print(sex, sport)
print(best_ind, best_ind.fitness.values)

result[(sex,sport)] = best_ind

100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [1:07:49<00:00,  4.07s/it]

Female Running
[278.6, 240.65877151510045, 94.4286911463292, 69.0, 25.8, 8.5] (array([0.04790571], dtype=float32),)





In [None]:
result

{('Female', 'Running'): [278.6,
  240.65877151510045,
  94.4286911463292,
  69.0,
  25.8,
  8.5]}

And also the same for each other **tuple(sex,sport)**

We split search between our computing power units (including couple of AWS) and manually entered result values