In [42]:
import numpy as np
import pandas as pd
import random
from itertools import combinations
from tqdm import tqdm

In [43]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
df = pd.read_csv('/content/drive/MyDrive/Are_your_employees_burning_out/train_preprocessed.csv')

In [45]:
df.head()

Unnamed: 0,WFH Setup Available,Designation,Resource Allocation,Mental Fatigue Score,Burn Rate
0,-1,2.0,3.0,3.8,0.16
1,1,1.0,2.0,5.0,0.36
2,1,1.0,1.0,2.6,0.2
3,-1,3.0,7.0,6.9,0.52
4,1,2.0,4.0,3.6,0.29


In [46]:
#This will be changed back to -1 when calculating the fitness value.
df['WFH Setup Available'] = df['WFH Setup Available'].map({-1:0, 1:1})

In [47]:
df.head()

Unnamed: 0,WFH Setup Available,Designation,Resource Allocation,Mental Fatigue Score,Burn Rate
0,0,2.0,3.0,3.8,0.16
1,1,1.0,2.0,5.0,0.36
2,1,1.0,1.0,2.6,0.2
3,0,3.0,7.0,6.9,0.52
4,1,2.0,4.0,3.6,0.29


In [48]:
df.duplicated().sum()

6332

In [49]:
df.drop_duplicates(inplace=True)

In [50]:
df = df.drop(['Burn Rate'], axis = 1)

In [51]:
df_des_0 = df[df['Designation'] == 0.0]
df_des_1 = df[df['Designation'] == 1.0]
df_des_2 = df[df['Designation'] == 2.0]
df_des_3 = df[df['Designation'] == 3.0]
df_des_4 = df[df['Designation'] == 4.0]
df_des_5 = df[df['Designation'] == 5.0]

In [52]:
df_des_0 = df_des_0.drop(['Designation'], axis = 1)
df_des_1 = df_des_1.drop(['Designation'], axis = 1)
df_des_2 = df_des_2.drop(['Designation'], axis = 1)
df_des_3 = df_des_3.drop(['Designation'], axis = 1)
df_des_4 = df_des_4.drop(['Designation'], axis = 1)
df_des_5 = df_des_5.drop(['Designation'], axis = 1)

In [53]:
des_list = [df_des_0, df_des_1, df_des_2, df_des_3, df_des_4, df_des_5]

In [54]:
def select_designation(des_num, no_of_samples):
    des_list = [df_des_0, df_des_1, df_des_2, df_des_3, df_des_4, df_des_5]
    return des_list[des_num].sample(n=no_of_samples).to_numpy()

In [55]:
parent_population = df_des_3.sample(n=10).to_numpy()

In [56]:
sum_hours = sum([x[1] for x in parent_population])

In [57]:
def float_to_binary(value, integer_bits, decimal_bits):
    # Convert the floating-point number to binary
    integer_part = int(value)
    decimal_part = value - integer_part

    # Convert the integer part to binary with the specified number of bits
    integer_representation = bin(integer_part)[2:].zfill(integer_bits)

    # Convert the decimal part to binary with the specified number of bits
    decimal_representation = ''
    for _ in range(decimal_bits):
        decimal_part *= 2
        if decimal_part >= 1:
            decimal_representation += '1'
            decimal_part -= 1
        else:
            decimal_representation += '0'
    binary_representation = str(integer_representation) + str(decimal_representation)

    # Combine the integer and decimal parts

    return binary_representation

In [58]:
def binary_to_float(binary_record):

    wfh_status = binary_record[0]
    hours_allocated = binary_record[1:5]
    fatigue_score = binary_record[5:]

    #decoding wfh status to original scenario.
    if wfh_status == 0:
      wfh_status = -1

    #decoding hours allocated
    hours_allocated =  int("".join(map(str, hours_allocated)), 2)

    #for fatigue score decoding
    whole_number_bits = fatigue_score[:-4]
    real_value_bits = fatigue_score[-4:]

    whole_number = int("".join(map(str, whole_number_bits)), 2)
    real_value = sum(bit * 2**(-i-1) for i, bit in enumerate(real_value_bits))

    fatigue_score = whole_number + real_value

    decimal_record = [wfh_status, hours_allocated, fatigue_score]

    return decimal_record

In [59]:
def pop_to_binary(population):
  binary_population = []
  for record in population:
    binary_representations = float_to_binary(record[0], 1, 0) + float_to_binary(record[1], 4, 0) + float_to_binary(record[2], 4, 4)
    binary_population.append(binary_representations)

  binary_2d_array = [[int(digit) for digit in binary_str] for binary_str in binary_population]

  return binary_2d_array

In [60]:
def pop_to_float(binary_population):
    unfiltered_pool = []

    for i in binary_population:
        unfiltered_pool.append(binary_to_float(i))

    return unfiltered_pool

In [61]:
def individual_fitness_score(feature_weights, record):

  wfh_val = feature_weights[0] * record[0]
  hours_allocated_val = feature_weights[1] * record[1]
  fatigue_val = feature_weights[2] * record[2]
  bias = feature_weights[3]

  fitness_val = wfh_val + hours_allocated_val + fatigue_val + bias

  return fitness_val

In [62]:
def cal_fitness_score(feature_weights, population):
  scores = []
  for record in population:
    decimal_record = binary_to_float(record)

    wfh_val = feature_weights[0] * decimal_record[0]
    hours_allocated_val = feature_weights[1] * decimal_record[1]
    fatigue_val = feature_weights[2] * decimal_record[2]
    bias = feature_weights[3]

    fitness_val = wfh_val + hours_allocated_val + fatigue_val + bias
    scores.append(fitness_val)
  scores_scaled = [1 if x > 1 else x for x in scores]
  scores_scaled = [0 if x < 0 else x for x in scores]

  return scores_scaled

In [63]:
def tournament_selection(population, scores, num_selected):

  population_copy = population.copy()
  scores_copy = scores.copy()

  selected_individuals = []
  selected_individual_score = []

  while len(selected_individuals) < num_selected:
      # print(len(population_copy))

      if len(population) < 2:
          break

      group_indices = random.sample(range(len(population_copy)), 2)
      group = [(population_copy[i], scores_copy[i]) for i in group_indices]

      # Get the best individual from the group based on fitness scores
      winner = max(group, key=lambda x: x[1])[0]
      winner_score = max(group, key=lambda x: x[1])[1]

      selected_individuals.append(winner)
      selected_individual_score.append(winner_score)

      # remove the selected individual from the population and fitness scores
      index_to_remove = group_indices[group.index((winner, max(group, key=lambda x: x[1])[1]))]
      population_copy.pop(index_to_remove)
      scores_copy.pop(index_to_remove)

  return selected_individuals, selected_individual_score

In [64]:
def roulette_wheel_select(population, scores, num_selected):
  total_fitness = sum(scores)
  selection_probabilities = [score / total_fitness for score in scores]

  selected_pairs = []

  for _ in range(num_selected):
      # Spin the roulette wheel to select two individuals
      selected_individuals = []
      for _ in range(2):
          rand_value = random.random()
          cumulative_prob = 0

          for i, prob in enumerate(selection_probabilities):
              cumulative_prob += prob
              if rand_value <= cumulative_prob:
                  selected_individuals.append(population[i])
                  break

      selected_pairs.append(selected_individuals)

  return selected_pairs

In [65]:
def constraints(record, feature_weights, selected_pool):
  record = binary_to_float(record)
  score = individual_fitness_score(feature_weights, record)
  if (record[0] == -1) or (record[0] == 1):
      if (record[1] > 0) and (record[1] <= 10):
          if (record[2] >= 0) and (record[2] <= 10):
            if (score > 0) and (score < 1):
              if record not in selected_pool:
                return True

  return False

In [66]:
def crossover(selected_pool, selected_pool_scores, num_points, num_offsprings, feature_weights):
  offspring_count = 0
  offspring_pool = []
  while (offspring_count < num_offsprings):
    mating_pairs = roulette_wheel_select(selected_pool, selected_pool_scores, 1)
    parent1 = mating_pairs[0][0]
    parent2 = mating_pairs[0][1]

    crossover_points = sorted(np.random.choice(len(parent1), num_points, replace=False))
    offspring = parent1.copy()

    for i in range(len(crossover_points) - 1):
        if i % 2 == 1:
            offspring[crossover_points[i]:crossover_points[i+1]] = parent2[crossover_points[i]:crossover_points[i+1]]

    float_selected_pool = pop_to_float(selected_pool)
    offspring_validity = constraints(offspring, feature_weights, float_selected_pool)

    if offspring_validity == False:
      continue

    offspring_count += 1
    offspring_pool.append(offspring)

  return offspring_pool

In [67]:
def mutate(selected_pool, num_points, feature_weights):
  valid = False
  mutated_pool = []
  for individual in selected_pool:
    valid = False
    while valid == False:
      mutated_individual = individual.copy()
      mutation_points = np.random.choice(len(mutated_individual), num_points, replace=False)
      # print(mutated_individual)

      for point in mutation_points:
        mutated_individual[point] = 1 - mutated_individual[point]

      #print(mutated_individual)
      valid = constraints(mutated_individual, feature_weights, pop_to_float(selected_pool))
    mutated_pool.append(mutated_individual)

  return mutated_pool

In [68]:
binary_population = pop_to_binary(parent_population)

In [69]:
binary_population

[[0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1],
 [1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0],
 [1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
 [0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1],
 [0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1],
 [1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0],
 [0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0],
 [0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0],
 [1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0],
 [0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1]]

In [70]:
feature_weights_0 = [0.00059435, 0.01919768, 0.05873248, -0.02695521736796405]
feature_weights_1 = [-0.00303212, 0.02074924, 0.07094721, -0.05955500979744721]
feature_weights_2 = [-0.00607127, 0.02527233, 0.0790592, -0.11719479853351966]
feature_weights_3 = [-0.00630675, 0.02912954, 0.0811169, -0.1601507238663965]
feature_weights_4 = [-0.0027939, 0.02795119, 0.07740094, -0.12095795716822455]
feature_weights_5 = [0.00404156, 0.02623495, 0.07188201, -0.020223479611829576]

In [71]:
def get_feature_weights(des_num):
    feature_weights_list = [feature_weights_0, feature_weights_1, feature_weights_2, feature_weights_3, feature_weights_4, feature_weights_5]
    return feature_weights_list[des_num]

In [72]:
def generate_combinations(pool, comb_no):
    all_combinations = list(combinations(pool.values(), comb_no))
    return all_combinations


In [73]:
def combinations_to_dict(all_combinations):
    all_dicts = []

    for i, comb in enumerate(all_combinations):
        temp = {
            'combinations' : []
        }

        for j, genome in enumerate(comb):
            temp['combinations'].append(genome)

        all_dicts.append(temp)
    return all_dicts

In [74]:
def calculate_pool_metrics(all_dicts):

    for comb in all_dicts:
        total_hours = 0
        total_score = 0
        mean_score = 0
        min_hours = 10
        max_hours = 0
        hour_diff = 10

        for genome in comb['combinations']:
            total_hours += genome['hours']
            total_score += genome['score']
            min_hours = genome['hours'] if genome['hours'] < min_hours else min_hours
            max_hours = genome['hours'] if genome['hours'] > max_hours else max_hours

        mean_score = total_score/10
        hour_diff = max_hours - min_hours

        comb['diff'] = hour_diff
        comb['mean_score'] = mean_score
        comb['total_hours'] = total_hours

    return all_dicts

In [75]:
def filter_combinations_with_hours(combinations, no_of_hours):
    filtered_combinations = []
    no_of_hours_matched = False

    for pool in combinations:
        if pool['total_hours'] == no_of_hours:
            filtered_combinations.append(pool)

    # if len(filtered_combinations) == 0:
    #     for pool in combinations:
    #         if (pool['total_hours'] <= no_of_hours + 10) and (pool['total_hours'] >= no_of_hours - 10):
    #             filtered_combinations.append(pool)


    return filtered_combinations

In [76]:
def sort_combinations_by_score_and_diff(combinations):
    return sorted(combinations, key=lambda x: (x['diff']/x['mean_score']), reverse=True)

In [77]:
def select_final_pool(combinations):
    return combinations[0]

In [78]:
def convert_to_dict(unfiltered_pool_with_scores):
    pool_dict_list = {}

    for index, i in enumerate(unfiltered_pool_with_scores):
        temp = {}
        temp['wfh'] = i[0]
        temp['hours'] = i[1]
        temp['fatigue'] = i[2]
        temp['score'] = i[3]
        pool_dict_list[index] = temp

    return pool_dict_list

In [79]:
def pop_dict_to_list(pop_dict):
    pop_list = []
    for dict in pop_dict['combinations']:
        temp = []
        temp.append(dict['wfh'] if dict['wfh'] == 1 else 0)
        temp.append(dict['hours'])
        temp.append(dict['fatigue'])
        pop_list.append(temp)

    return pop_list

In [80]:
def cal_pop_resource_distribution(population, diff_threshold):
  genome_diff_exceeds = True

  for genome in population:
    if genome["diff"] > diff_threshold:
      genome_diff_exceeds = False

  return genome_diff_exceeds

In [81]:
def runner_func(designation_no, no_of_parent_samples, no_crossover_points, no_mutation_points, no_crossover_offsprings, no_generations, diff_threshold):
    offspring_pool_size = no_of_parent_samples
    mating_pool_size = no_of_parent_samples / 2

    parent_population = select_designation(designation_no, no_of_parent_samples)
    # print(parent_population)
    sum_hours = sum([x[1] for x in parent_population])
    # print("sum hours", sum_hours)
    population_weights = get_feature_weights(designation_no)
    # print(population_weights)

    for i in range(no_generations):

        found_offspring_pool = False

        while found_offspring_pool == False:

            found_offspring_pool = False
            binary_representation = pop_to_binary(parent_population)
            parent_population_scores = cal_fitness_score(population_weights, binary_representation)
            # if i == 0:
            #     print(parent_population_scores)
            mating_pool, mating_pool_scores = tournament_selection(binary_representation, parent_population_scores, mating_pool_size)
            offspring_pool_crossover = crossover(mating_pool, mating_pool_scores, no_crossover_points, no_crossover_offsprings, population_weights)
            offspring_pool_mutation = mutate(mating_pool, no_mutation_points, population_weights)

            unfiltered_pool = offspring_pool_crossover + offspring_pool_mutation + mating_pool
            unfiltered_pool_scores = cal_fitness_score(population_weights, unfiltered_pool)
            unfiltered_pool = pop_to_float(unfiltered_pool)
            unfiltered_pool_with_scores = [[*i,j]for i,j in zip(unfiltered_pool, unfiltered_pool_scores)]

            pool_dict_list = convert_to_dict(unfiltered_pool_with_scores)


            combinations = generate_combinations(pool_dict_list, offspring_pool_size)

            combinations_dict = combinations_to_dict(combinations)

            combinations_dict_metrics = calculate_pool_metrics(combinations_dict)

            filtered_combinations = filter_combinations_with_hours(combinations_dict_metrics, sum_hours)

            sorted_combinations = sort_combinations_by_score_and_diff(filtered_combinations)

            if cal_pop_resource_distribution(sorted_combinations, diff_threshold) == False:
              continue

            if len(sorted_combinations) != 0:
                final_pool = select_final_pool(sorted_combinations)
                found_offspring_pool = True
                population_list = pop_dict_to_list(final_pool)
                parent_population = np.array(population_list)




    total_scores = 0
    for i in final_pool['combinations']:
      total_scores = total_scores + i['score']

    mean_score = total_scores/offspring_pool_size
    #print(f"mean_score: {total_scores/offspring_pool_size}")

    return final_pool['combinations'], mean_score







In [82]:
final_pool, mean_score = runner_func(designation_no=3, no_of_parent_samples=10, no_crossover_points=4, no_mutation_points=4, no_crossover_offsprings=5, no_generations=50, diff_threshold=3)

In [83]:
mean_score

0.3979143393836035

In [84]:
final_pool

[{'wfh': -1, 'hours': 6, 'fatigue': 5.8125, 'score': 0.4924252473836035},
 {'wfh': 1, 'hours': 6, 'fatigue': 4.9375, 'score': 0.40883445988360356},
 {'wfh': -1, 'hours': 6, 'fatigue': 5.8125, 'score': 0.4924252473836035},
 {'wfh': 1, 'hours': 5, 'fatigue': 3.375, 'score': 0.2529597636336035},
 {'wfh': 1, 'hours': 6, 'fatigue': 3.5625, 'score': 0.29729872238360355},
 {'wfh': 1, 'hours': 6, 'fatigue': 3.8125, 'score': 0.31757794738360356},
 {'wfh': 1, 'hours': 5, 'fatigue': 4.125, 'score': 0.31379743863360354},
 {'wfh': 1, 'hours': 6, 'fatigue': 6.9375, 'score': 0.5710682598836035},
 {'wfh': -1, 'hours': 6, 'fatigue': 4.8125, 'score': 0.41130834738360356},
 {'wfh': -1, 'hours': 6, 'fatigue': 4.9375, 'score': 0.4214479598836035}]