In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import time

## Problem Information
From Kaggle -
Santa has exciting news! For 100 days before Christmas, he opened up tours to his workshop. Because demand was so strong, and because Santa wanted to make things as fair as possible, he let each of the 5,000 families that will visit the workshop choose a list of dates they'd like to attend the workshop.

Now that all the families have sent Santa their preferences, he's realized it's impossible for everyone to get their top picks, so he's decided to provide extra perks for families that don't get their preferences. In addition, Santa's accounting department has told him that, depending on how families are scheduled, there may be some unexpected and hefty costs incurred.

Santa needs the help of the Kaggle community to optimize which day each family is assigned to attend the workshop in order to minimize any extra expenses that would cut into next years toy budget! Can you help Santa out?

The total number of people attending the workshop each day must be between 125 - 300; if even one day is outside these occupancy constraints, the submission will error and will not be scored.

Our notes -
We are given each family's 10 preferred days to visit Santa. If the family does not visit Santa on the first day, then Santa incurs a preference cost to accomodate them. There is also an accounting cost that penalizes large variations in the number of people per day. 

Using the family's first choice as a starting point provides a good method for initializing the optimization. This is because the more families that are given their top choice, the lower the overall cost will be. 

A first thing to examine is the distribution of the preferred days (by number of people).

In [10]:
family_preferences = pd.read_csv("../Data/family_data.csv", index_col= "family_id")

## Helper Functions

Two helper functions are defined below to help for heuristic evaluation

1) Function to evalutate the cost quickly. This code was provided by Kaggle for the competition and was much faster  than the code we initially developed for calculating the cost

2) Function to get to a feasible point quickly. Instead of starting at a random location, it is favorable to start with each family's preferred day to visit. However, this leads to some days having too many or too few people. We fix this by randomly assigning people on days with too many people to days with fewer people. Another possible starting point would be too randomly assign people to individual days, but we believe that this starting point will be more beneficial.

In [16]:
def get_cost(pred, fs_d, ch_d):
    
    days = list(range(100,0,-1))
    
    penalty = 0

    # We'll use this to count the number of people scheduled each day
    daily_occupancy = {k:0 for k in days}
    
    # Looping over each family; d is the day for each family f
    for f, d in enumerate(pred):

        # Using our lookup dictionaries to make simpler variable names
        n = fs_d[f]
        choice_0 = ch_d['choice_0'][f]
        choice_1 = ch_d['choice_1'][f]
        choice_2 = ch_d['choice_2'][f]
        choice_3 = ch_d['choice_3'][f]
        choice_4 = ch_d['choice_4'][f]
        choice_5 = ch_d['choice_5'][f]
        choice_6 = ch_d['choice_6'][f]
        choice_7 = ch_d['choice_7'][f]
        choice_8 = ch_d['choice_8'][f]
        choice_9 = ch_d['choice_9'][f]

        # add the family member count to the daily occupancy
        daily_occupancy[d] += n

        # Calculate the penalty for not getting top preference
        if d == choice_0:
            penalty += 0
        elif d == choice_1:
            penalty += 50
        elif d == choice_2:
            penalty += 50 + 9 * n
        elif d == choice_3:
            penalty += 100 + 9 * n
        elif d == choice_4:
            penalty += 200 + 9 * n
        elif d == choice_5:
            penalty += 200 + 18 * n
        elif d == choice_6:
            penalty += 300 + 18 * n
        elif d == choice_7:
            penalty += 300 + 36 * n
        elif d == choice_8:
            penalty += 400 + 36 * n
        elif d == choice_9:
            penalty += 500 + 36 * n + 199 * n
        else:
            penalty += 500 + 36 * n + 398 * n

    # for each date, check total occupancy
    #  (using soft constraints instead of hard constraints)
    for _, v in daily_occupancy.items():
        if (v > 300) or (v < 125):
            penalty += 100000000

    # Calculate the accounting cost
    # The first day (day 100) is treated special
    accounting_cost = (daily_occupancy[days[0]]-125.0) / 400.0 * daily_occupancy[days[0]]**(0.5)
    # using the max function because the soft constraints might allow occupancy to dip below 125
    accounting_cost = max(0, accounting_cost)
    
    # Loop over the rest of the days, keeping track of previous count
    yesterday_count = daily_occupancy[days[0]]
    for day in days[1:]:
        today_count = daily_occupancy[day]
        diff = abs(today_count - yesterday_count)
        accounting_cost += max(0, (daily_occupancy[day]-125.0) / 400.0 * daily_occupancy[day]**(0.5 + diff / 50.0))
        yesterday_count = today_count

    penalty += accounting_cost

    return penalty

In [17]:
def get_days_over_n(df, n):
    df = df.groupby(df.columns[-1]).sum().sort_values(by = "n_people")
    return df.index[df['n_people'] > n].tolist()

def get_days_under_n(df, n):
    df = df.groupby(df.columns[-1]).sum().sort_values(by = "n_people")
    return df.index[df['n_people'] < n ].tolist()

def check_days(df):
    return len(get_days_over_n(df, 300)) + len(get_days_under_n(df, 125))

def greedy_random_assignment(pred_df, eps):
    # Idea is to initialize all families with their first choice and then randomly assign families
    # on days where there are too many people (i.e. over 300) with preference towards assigning them to days
    # with less than 125 people.
    num_day_list = 1
    
    # Find an initial assignment that is feasible
    while num_day_list > 0:
        days_over = get_days_over_n(pred_df, 300)
        days_under = get_days_under_n(pred_df, 125)
        
        # If there are no days with over 300 people, take people from days with more than 250 people
        if ((len(days_over) == 0) & (len(days_under) > 0)):
            days_over = get_days_over_n(pred_df, 250)
            
        # If there are no days with under 125 people, give people to days with less than 175 people
        if ((len(days_over) > 0) & (len(days_under) == 0)):
            days_under = get_days_under_n(pred_df, 200)           
            
        for day in days_over:
            fam_assigned = pred_df.index[pred_df['assigned_day'] == day].tolist()
            for fam in fam_assigned:
                if np.random.random() < eps:
                    pred_df.iloc[fam, -1] = np.random.choice(days_under)    
        
        num_day_list = check_days(pred_df)
    return pred_df

## Heuristic 3: Genetic Algorithm

In [None]:
def genetic_algorithm(population_size, assign_df, epsilon, fam_size_dict, choice_dict, parents_count, mutation_rate, num_iter, family_matrix):
    
    # Initial population has 5000 entries, one for each family
    population = np.zeros((population_size, 5000))
    new_population = np.zeros((population_size, 5000))
    fitness = np.zeros((population_size, 1))
    cumulative_fitness = np.zeros((population_size, 1))
    best_cost = 10e10
    
    for iteration in range(num_iter):
        if iteration == 0:

            # Initialize population and fitness scores
            for i in range(population_size):
                temp_df = assign_df.copy(deep = True)
                temp_assignment = greedy_random_assignment(pred_df=temp_df, eps=epsilon)["assigned_day"]
                population[i,:] = temp_assignment
                fitness[i] = get_cost(pred=temp_assignment, fs_d=fam_size_dict, ch_d=choice_dict)
        else:
            # Add parents to new population
            for i in range(parents_count):
                new_population[i,:] = population[new_parents[i], :]

            # Crossover to make offspring: Using 2 point crossover
            for i in range(parents_count, population_size):
                parent_1 = np.random.choice(range(parents_count))
                parent_2 = np.random.choice([x for x in range(parents_count) if x != parent_1])
                crossover_1 = np.random.choice(range(1,4999))
                crossover_2 = np.random.choice([x for x in range(1,4999) if x != crossover_1])
                if crossover_1 > crossover_2:
                    crossover_2, crossover_1 = crossover_1, crossover_2
                new_population[i, :crossover_1] = new_population[parent_1, :crossover_1]
                new_population[i, crossover_1:crossover_2] = new_population[parent_2, crossover_1:crossover_2]
                new_population[i, crossover_2:] = new_population[parent_1, crossover_2:]

            # Mutation: Calculate number of mutations and randomly apply then
            num_mutations = int(mutation_rate * population_size * 5000)
            # Assign a decreasing probability that their worse choices are selected
            choice_prob = [0.181818182,0.163636364,0.145454545,0.127272727,0.109090909,0.090909091,0.072727273,0.054545455,0.036363636,0.018181818]
            fam_mutate = np.random.choice(range(5000), size = num_mutations)
            mutated_choice = np.random.choice(range(10), size = num_mutations, p = choice_prob)
            chrom_choice = np.random.choice(range(population_size), size = num_mutations)
            for mut in range(int(num_mutations)):
                new_population[chrom_choice[mut], fam_mutate[mut]] = family_matrix.iloc[fam_mutate[mut], mutated_choice[mut]]
                
            # Calculate fitness
            for i in range(population_size):
                fitness[i] = get_cost(pred=new_population[i,:].astype(int), fs_d=fam_size_dict, ch_d=choice_dict)
            
            # Assign new population to population
            population = new_population
    
        # Take the top n parents for the next population. 
        new_parents = np.argsort(-fitness.ravel())[-parents_count:]
        if min(fitness.ravel()) < best_cost:
            best_cost = min(fitness.ravel())
        
#         if iteration % 25 == 0:
#             print(f'Iteration {iteration} cost {best_cost}')
        
    return population, best_cost

In [68]:
import time

start = time.time()
# For testing, num_mutations = mutation_rate * 5000 * population_size. Keep mutations relatively small
genetic_algorithm(population_size = 80,
                  assign_df = initial_allocation,
                  epsilon = 0.5,
                  fam_size_dict=family_size_dict,
                  choice_dict=family_choice_dict,
                  parents_count = 10,
                  mutation_rate = 0.001,
                  num_iter = 40000,
                  family_matrix = family_data)
print(time.time() - start)

# 40, 10, 0.001, 1000, 1186619, 807.91
# 80, 20, 0.001, 1000, 1014474, 1563.84
# 20, 5, 0.001, 1000, 1268749, 395.94
# 25, 5, 0.001, 1000, 1221041, 484.79
# 80, 10, 0.001, 1000, 793211, 1557.66
# 80, 10, 0.001, 40000, 257652.19, 41584

Iteration 0 cost 339721012.66055775
Iteration 25 cost 3628096.884945388
Iteration 50 cost 3155532.1284129787
Iteration 75 cost 2906821.6036595544
Iteration 100 cost 2675766.5838044444
Iteration 125 cost 2484251.082202057
Iteration 150 cost 2317331.469120266
Iteration 175 cost 2171788.6950669847
Iteration 200 cost 2023577.6536268925
Iteration 225 cost 1910976.2337834951
Iteration 250 cost 1819583.1983185601
Iteration 275 cost 1735434.7285447267
Iteration 300 cost 1663411.8133958308
Iteration 325 cost 1582255.4623811655
Iteration 350 cost 1511540.1395587272
Iteration 375 cost 1450273.200051172
Iteration 400 cost 1386500.0290644227
Iteration 425 cost 1338571.7694548324
Iteration 450 cost 1293502.0787732683
Iteration 475 cost 1242465.3502323565
Iteration 500 cost 1209781.2362205894
Iteration 525 cost 1165835.7780965199
Iteration 550 cost 1128003.3432164737
Iteration 575 cost 1096721.0654935364
Iteration 600 cost 1061932.5381173524
Iteration 625 cost 1023241.8771811116
Iteration 650 cost 99

Iteration 5375 cost 347853.5470593992
Iteration 5400 cost 346793.58348950295
Iteration 5425 cost 346499.2543225705
Iteration 5450 cost 345872.95830436685
Iteration 5475 cost 344101.8871898375
Iteration 5500 cost 343665.9420388778
Iteration 5525 cost 343480.5924437408
Iteration 5550 cost 343480.5924437408
Iteration 5575 cost 343480.5924437408
Iteration 5600 cost 343480.5924437408
Iteration 5625 cost 343480.5924437408
Iteration 5650 cost 343480.5924437408
Iteration 5675 cost 343480.5924437408
Iteration 5700 cost 343480.5924437408
Iteration 5725 cost 343480.5924437408
Iteration 5750 cost 343480.5924437408
Iteration 5775 cost 343480.5924437408
Iteration 5800 cost 343480.5924437408
Iteration 5825 cost 343480.5924437408
Iteration 5850 cost 343480.5924437408
Iteration 5875 cost 343480.5924437408
Iteration 5900 cost 343480.5924437408
Iteration 5925 cost 343480.5924437408
Iteration 5950 cost 343480.5924437408
Iteration 5975 cost 343480.5924437408
Iteration 6000 cost 343480.5924437408
Iteration 

Iteration 10725 cost 312755.869494068
Iteration 10750 cost 312755.869494068
Iteration 10775 cost 312755.869494068
Iteration 10800 cost 312755.869494068
Iteration 10825 cost 312755.869494068
Iteration 10850 cost 312755.869494068
Iteration 10875 cost 312755.869494068
Iteration 10900 cost 312755.869494068
Iteration 10925 cost 312755.869494068
Iteration 10950 cost 312755.869494068
Iteration 10975 cost 312473.5436326873
Iteration 11000 cost 312473.5436326873
Iteration 11025 cost 312338.7129673244
Iteration 11050 cost 311833.3199249078
Iteration 11075 cost 311639.8681929921
Iteration 11100 cost 311639.8681929921
Iteration 11125 cost 311639.8681929921
Iteration 11150 cost 311639.8681929921
Iteration 11175 cost 311639.8681929921
Iteration 11200 cost 311639.8681929921
Iteration 11225 cost 311639.8681929921
Iteration 11250 cost 311639.8681929921
Iteration 11275 cost 311639.8681929921
Iteration 11300 cost 311639.8681929921
Iteration 11325 cost 311639.8681929921
Iteration 11350 cost 311386.1232708

Iteration 15950 cost 289031.680256692
Iteration 15975 cost 289031.680256692
Iteration 16000 cost 289031.680256692
Iteration 16025 cost 289031.680256692
Iteration 16050 cost 289031.680256692
Iteration 16075 cost 289031.680256692
Iteration 16100 cost 289031.680256692
Iteration 16125 cost 289031.680256692
Iteration 16150 cost 289031.680256692
Iteration 16175 cost 289031.680256692
Iteration 16200 cost 289031.680256692
Iteration 16225 cost 289031.680256692
Iteration 16250 cost 289031.680256692
Iteration 16275 cost 289031.680256692
Iteration 16300 cost 289031.680256692
Iteration 16325 cost 289031.680256692
Iteration 16350 cost 289031.680256692
Iteration 16375 cost 289031.680256692
Iteration 16400 cost 289031.680256692
Iteration 16425 cost 289031.680256692
Iteration 16450 cost 289031.680256692
Iteration 16475 cost 289031.680256692
Iteration 16500 cost 289031.680256692
Iteration 16525 cost 289031.680256692
Iteration 16550 cost 289031.680256692
Iteration 16575 cost 289031.680256692
Iteration 16

Iteration 21225 cost 283733.46559755394
Iteration 21250 cost 283733.46559755394
Iteration 21275 cost 283733.46559755394
Iteration 21300 cost 283733.46559755394
Iteration 21325 cost 283733.46559755394
Iteration 21350 cost 283733.46559755394
Iteration 21375 cost 283733.46559755394
Iteration 21400 cost 283733.46559755394
Iteration 21425 cost 283733.46559755394
Iteration 21450 cost 283733.46559755394
Iteration 21475 cost 283733.46559755394
Iteration 21500 cost 283733.46559755394
Iteration 21525 cost 283733.46559755394
Iteration 21550 cost 283733.46559755394
Iteration 21575 cost 283733.46559755394
Iteration 21600 cost 283733.46559755394
Iteration 21625 cost 283733.46559755394
Iteration 21650 cost 283733.46559755394
Iteration 21675 cost 283733.46559755394
Iteration 21700 cost 283733.46559755394
Iteration 21725 cost 283733.46559755394
Iteration 21750 cost 283733.46559755394
Iteration 21775 cost 283733.46559755394
Iteration 21800 cost 283733.46559755394
Iteration 21825 cost 283733.46559755394


Iteration 26425 cost 276216.69343714777
Iteration 26450 cost 276216.69343714777
Iteration 26475 cost 276216.69343714777
Iteration 26500 cost 276216.69343714777
Iteration 26525 cost 276216.69343714777
Iteration 26550 cost 276216.69343714777
Iteration 26575 cost 276216.69343714777
Iteration 26600 cost 276216.69343714777
Iteration 26625 cost 276216.69343714777
Iteration 26650 cost 276216.69343714777
Iteration 26675 cost 276216.69343714777
Iteration 26700 cost 276216.69343714777
Iteration 26725 cost 276216.69343714777
Iteration 26750 cost 276216.69343714777
Iteration 26775 cost 276216.69343714777
Iteration 26800 cost 276216.69343714777
Iteration 26825 cost 276216.69343714777
Iteration 26850 cost 276216.69343714777
Iteration 26875 cost 276216.69343714777
Iteration 26900 cost 276216.69343714777
Iteration 26925 cost 276216.69343714777
Iteration 26950 cost 276216.69343714777
Iteration 26975 cost 276216.69343714777
Iteration 27000 cost 276216.69343714777
Iteration 27025 cost 276216.69343714777


Iteration 31625 cost 269365.67446343676
Iteration 31650 cost 269365.67446343676
Iteration 31675 cost 269365.67446343676
Iteration 31700 cost 269365.67446343676
Iteration 31725 cost 269365.67446343676
Iteration 31750 cost 269365.67446343676
Iteration 31775 cost 268157.3024975452
Iteration 31800 cost 268157.3024975452
Iteration 31825 cost 268157.3024975452
Iteration 31850 cost 267839.9978121877
Iteration 31875 cost 267286.8890263213
Iteration 31900 cost 267183.2616136433
Iteration 31925 cost 267183.2616136433
Iteration 31950 cost 266952.4338418443
Iteration 31975 cost 265848.4128436002
Iteration 32000 cost 265737.91438378487
Iteration 32025 cost 265641.7111786234
Iteration 32050 cost 265641.7111786234
Iteration 32075 cost 265641.7111786234
Iteration 32100 cost 265641.7111786234
Iteration 32125 cost 265641.7111786234
Iteration 32150 cost 265641.7111786234
Iteration 32175 cost 265641.7111786234
Iteration 32200 cost 265641.7111786234
Iteration 32225 cost 265641.7111786234
Iteration 32250 co

Iteration 36875 cost 264399.02413082327
Iteration 36900 cost 263856.7186405475
Iteration 36925 cost 263359.55959816894
Iteration 36950 cost 263359.55959816894
Iteration 36975 cost 263359.55959816894
Iteration 37000 cost 263359.55959816894
Iteration 37025 cost 263359.55959816894
Iteration 37050 cost 263359.55959816894
Iteration 37075 cost 263206.85665650695
Iteration 37100 cost 263206.85665650695
Iteration 37125 cost 262693.25599456247
Iteration 37150 cost 262693.25599456247
Iteration 37175 cost 262693.25599456247
Iteration 37200 cost 262693.25599456247
Iteration 37225 cost 262693.25599456247
Iteration 37250 cost 262693.25599456247
Iteration 37275 cost 262693.25599456247
Iteration 37300 cost 262693.25599456247
Iteration 37325 cost 262693.25599456247
Iteration 37350 cost 262693.25599456247
Iteration 37375 cost 262693.25599456247
Iteration 37400 cost 262693.25599456247
Iteration 37425 cost 262693.25599456247
Iteration 37450 cost 262693.25599456247
Iteration 37475 cost 262693.25599456247
I

## Data import

In [20]:
# Data importing
family_data = pd.read_csv("../Data/family_data.csv", index_col= "family_id")
initial_submission = pd.read_csv("../Data/sample_submission.csv")
initial_allocation = pd.concat([initial_submission, family_data.iloc[:, -1], family_data.iloc[:,0]], axis = 1)
initial_allocation = initial_allocation[['family_id', 'n_people', 'choice_0']]
initial_allocation = initial_allocation.rename(columns={"choice_0": "assigned_day"})


# Kaggle provided a cost function, which is very fast. Need these values to use it.
family_size_dict = family_data[['n_people']].to_dict()['n_people']
cols = [f'choice_{i}' for i in range(10)]
family_choice_dict = family_data[cols].to_dict()

## Genetic Algorithm Tests

In [None]:
population_size_list = [20, 30, 40]
for pop_size in population_size_list:
    worst_cost = 0
    best_cost = 10e10
    worst_time = 0
    best_time = 10e10
    for test in range(10):

        # Run Random Search
        start = time.time()
        assignment, cost = genetic_algorithm(population_size = pop_size,
                                              assign_df = initial_allocation,
                                              epsilon = 0.5,
                                              fam_size_dict=family_size_dict,
                                              choice_dict=family_choice_dict,
                                              parents_count = 10,
                                              mutation_rate = 0.001,
                                              num_iter = 1000,
                                              family_matrix = family_data)
        end = time.time()
        print(f'Pop_Size {pop_size} Test {test} Finished')
        if test == 0:
            average_time = (end-start)
            average_cost = cost
        else:
            average_time = (average_time) + ((end-start)-average_time)/(test+1)
            average_cost = (average_cost) + ((cost)-average_cost)/(test+1)
        if (end-start) < best_time:
            best_time = (end-start)
        if (end-start) > worst_time:
            worst_time = (end-start)
        if cost < best_cost:
            best_cost = cost
        if cost > worst_cost:
            worst_cost = cost
    print("Pop_Size: %s" % pop_size)
    print("Average Time: %s" % np.round(average_time,3))
    print("Best Time: %s" % np.round(best_time,3))
    print("Worst Time: %s" % np.round(worst_time,3))
    print("Average Cost: %s" % np.round(average_cost,2))
    print("Best Cost: %s" % np.round(best_cost,2))
    print("Worst Cost: %s" % np.round(worst_cost,2))

In [None]:
num_parents_list = [5, 10, 20]
for num_parents in num_parents_list:
    worst_cost = 0
    best_cost = 10e10
    worst_time = 0
    best_time = 10e10
    for test in range(10):

        # Run Random Search
        start = time.time()
        assignment, cost = genetic_algorithm(population_size = 30,
                                              assign_df = initial_allocation,
                                              epsilon = 0.5,
                                              fam_size_dict=family_size_dict,
                                              choice_dict=family_choice_dict,
                                              parents_count = num_parents,
                                              mutation_rate = 0.001,
                                              num_iter = 1000,
                                              family_matrix = family_data)
        end = time.time()
        print(f'Num Parents {num_parents} Test {test} Finished')
        if test == 0:
            average_time = (end-start)
            average_cost = cost
        else:
            average_time = (average_time) + ((end-start)-average_time)/(test+1)
            average_cost = (average_cost) + ((cost)-average_cost)/(test+1)
        if (end-start) < best_time:
            best_time = (end-start)
        if (end-start) > worst_time:
            worst_time = (end-start)
        if cost < best_cost:
            best_cost = cost
        if cost > worst_cost:
            worst_cost = cost
    print("Num_Parents: %s" % num_parents)
    print("Average Time: %s" % np.round(average_time,3))
    print("Best Time: %s" % np.round(best_time,3))
    print("Worst Time: %s" % np.round(worst_time,3))
    print("Average Cost: %s" % np.round(average_cost,2))
    print("Best Cost: %s" % np.round(best_cost,2))
    print("Worst Cost: %s" % np.round(worst_cost,2))

In [None]:
mutation_rate_list = [0.0001, 0.001, 0.01]
for mut_rate in mutation_rate:
    worst_cost = 0
    best_cost = 10e10
    worst_time = 0
    best_time = 10e10
    for test in range(10):

        # Run Random Search
        start = time.time()
        assignment, cost = genetic_algorithm(population_size = 30,
                                              assign_df = initial_allocation,
                                              epsilon = 0.5,
                                              fam_size_dict=family_size_dict,
                                              choice_dict=family_choice_dict,
                                              parents_count = 10,
                                              mutation_rate = mut_rate,
                                              num_iter = 1000,
                                              family_matrix = family_data)
        end = time.time()
        print(f'Mutation Rate {mut_rate} Test {test} Finished')
        if test == 0:
            average_time = (end-start)
            average_cost = cost
        else:
            average_time = (average_time) + ((end-start)-average_time)/(test+1)
            average_cost = (average_cost) + ((cost)-average_cost)/(test+1)
        if (end-start) < best_time:
            best_time = (end-start)
        if (end-start) > worst_time:
            worst_time = (end-start)
        if cost < best_cost:
            best_cost = cost
        if cost > worst_cost:
            worst_cost = cost
    print("Mutation Rate: %s" % mut_rate)
    print("Average Time: %s" % np.round(average_time,3))
    print("Best Time: %s" % np.round(best_time,3))
    print("Worst Time: %s" % np.round(worst_time,3))
    print("Average Cost: %s" % np.round(average_cost,2))
    print("Best Cost: %s" % np.round(best_cost,2))
    print("Worst Cost: %s" % np.round(worst_cost,2))