In [17]:
from itertools import product
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from deap import base, creator, tools, algorithms
from math import factorial
from deap.benchmarks.tools import igd
from math import factorial
import warnings
import random
import tqdm

warnings.filterwarnings('ignore')

In [18]:
# Read in the travel times data
travel_times = pd.read_csv('./Travel_Times.csv')

travel_times = travel_times.dropna()


In [19]:
# Create a dictionary to store travel times
travel_times_dict = {(row["Home_LSOA"], row["Site_LSOA"]): row["TT"] for _, row in travel_times.iterrows()}

In [20]:
travel_times[travel_times['Home_LSOA'] == 'E01004801']
travel_times[travel_times['Home_LSOA'] == 'E01025104']


Unnamed: 0,Home_LSOA,Site_LSOA,TT


In [21]:

#Lets load the other data
sites = pd.read_csv('./Sites.csv', encoding='ISO-8859-1')
activities = pd.read_csv('./CC_Data.csv', encoding='ISO-8859-1')

sites = sites.loc[:, ['UnitCode', 'LSOA','NICU','LCU','SCBU']]
activities_orig = activities.loc[:, ['Der_Postcode_LSOA_Code','CC_Activity_Date','SiteLSOA', 'CC_Level']]
activities = activities.loc[:, ['Der_Postcode_LSOA_Code','CC_Activity_Date','SiteLSOA', 'CC_Level']]

#lets be brutal about this for now.. but we will need to clean our data properly at some point
sites = sites.replace('', np.nan)
sites = sites.dropna()
activities = activities.replace('', np.nan)
activities = activities.dropna()

site_lookup =  sites.loc[:, ['UnitCode', 'LSOA']]

# Ensure the date is a date
activities['CC_Activity_Date'] = pd.to_datetime(activities['CC_Activity_Date'], format='%d/%m/%Y')
activities = activities.set_index('Der_Postcode_LSOA_Code')


#Lets define some useful lists about our data
site_codes = sites['LSOA'].unique().tolist()
site_codes.remove("E01025104")
activity_types = activities['CC_Level'].unique().tolist()

#Lets define the time periods for the analysis
start_date = pd.Timestamp('2021-04-01')
end_date = pd.Timestamp('2022-03-31')
time_periods = pd.date_range(start_date, end_date, freq='D')

filtered_activities = activities.loc[(activities['CC_Activity_Date'] >= start_date) & (activities['CC_Activity_Date'] <= end_date)]

home_lsoas = filtered_activities.index.unique().tolist()
home_level_activities_count = filtered_activities.groupby(["Der_Postcode_LSOA_Code", "CC_Level"]).size().to_dict()
home_activities_count = filtered_activities.groupby(["CC_Activity_Date","Der_Postcode_LSOA_Code"]).size().to_dict()
home_activities_count

activities.head()

Unnamed: 0_level_0,CC_Activity_Date,SiteLSOA,CC_Level
Der_Postcode_LSOA_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E01025106,2021-06-13,E01025104,SCBU
E01025106,2021-06-15,E01025104,SCBU
E01025113,2021-05-23,E01025104,SCBU
E01025113,2021-05-25,E01025104,SCBU
E01025113,2021-05-27,E01025104,SCBU


In [41]:

num_days = 365

mprob = 0.2
popsize = 50
generations = 100

# Define the problem, Minimization in this case
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, 1.0))  
creator.create("Individual", list, fitness=creator.FitnessMulti)  


# Initialize the toolbox
toolbox = base.Toolbox()


def init_individual(icls, activities_df=None):
    individual = []
    for _, row in activities_df.iterrows():
        activity_date = row['CC_Activity_Date']
        cc_level = row['CC_Level']
        site = random.choice(site_codes)  # Assign a random site
        individual.append({"Activity_Date": activity_date, "CC_Level": cc_level, "Site": site})
    return icls(individual)


# Function to compute the total travel time for an individual
def eval_travel_time(individual):
    total_time = 0
    for activity in individual:
        home_lsoa = activity["Der_Postcode_LSOA_Code"]
        site = activity["Site"]
        site_lsoa = site
        try:
            # get the travel time for this assignment from the travel_times_dict
            total_time += travel_times_dict[(home_lsoa, site_lsoa)]
        except KeyError:
            pass  # Skip this activity if its travel time is not found in the dictionary
    return total_time,


def eval_smallest_site(individual):
    activity_counts = {site: 0 for site in site_codes}
    for activity in individual:
        site = activity["Site"]
        activity_counts[site] += 1
    smallest_site_size = min(activity_counts.values())
    return smallest_site_size

def eval_objectives(individual):
    # Your first objective
    total_time = eval_travel_time(individual)[0]  # Get the single value out of the tuple

    # Your second objective
    smallest_site_size = eval_smallest_site(individual)

    return total_time, smallest_site_size


# Function to perform mutation
def mutation(individual):
    # Mutate some of the activities by assigning them to a new site
    for activity in individual:
        if random.random() < mprob:
            new_site = random.choice(site_codes)  # Assign a new random site
            activity["Site"] = new_site

    return individual,

# Function to perform crossover
def crossover(ind1, ind2):
    # Ensure the individuals have the same length
    assert len(ind1) == len(ind2)

    # Choose a random crossover point
    crossover_point = random.randint(1, len(ind1) - 1)

    # Create new individuals by combining the parents' activities
    new_ind1 = ind1[:crossover_point] + ind2[crossover_point:]
    new_ind2 = ind2[:crossover_point] + ind1[crossover_point:]

    return new_ind1, new_ind2

def create_individual():
    return init_individual(creator.Individual, activities_df=activities)

toolbox.register("individual", create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", eval_objectives)
toolbox.register("mate", crossover)
toolbox.register("mutate", mutation)
toolbox.register("select", tools.selNSGA2, nd="standard")

def main():
    # Create initial population
    pop = toolbox.population(n=popsize)

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    # Evolve the population
    for gen in tqdm(range(generations)):
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))

        # Clone the selected individuals
        offspring = [toolbox.clone(ind) for ind in offspring]

        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < mprob:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Replace population with offspring
        pop[:] = offspring
        
        activity_counts = {site: {"SCBU": 0, "NICU": 0, "HDCU": 0} for site in site_codes}  # Initialize activity counts
        for individual in pop:  # Iterate over individuals in the population
            for activity in individual:  # Iterate over activities in each individual
                site = activity["Site"]
                cc_level = activity["CC_Level"]
                activity_counts[site][cc_level] += 1  # Update activity counts

        
        for site, counts in activity_counts.items():
            print(f"Site {site}:")
            for activity, count in counts.items():
                avg_count = count / num_days  # you need to define num_days
                print(f"  Average number of {activity} per day: {avg_count}")

    return pop

if __name__ == "__main__":
    result = main()
    print(result)

