# Santa's Workshop Tour 2019

## an optimization problem

# Rules

#### 1. Number of people each day between 125 and 300. Not families!

#### 2. Not assigning choice_0 to the families has a variable cost. It is called 'preference cost'

#### 3. There is other cost to take into account with its specific formula. It is called 'accounting penalty'

#### 4. The score is the sum of the 2 costs. Score = preference cost + accounting penalty. It's the value to optimize

In [218]:
# Libraries
import pandas as pd
import numpy as np

In [134]:
# reading the files
families = pd.read_csv('./data/family_data.csv', index_col = 0)
submission = pd.read_csv('./data/sample_submission.csv', index_col = 0)

In [135]:
families.head()

Unnamed: 0_level_0,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
family_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,52,38,12,82,33,75,64,76,10,28,4
1,26,4,82,5,11,47,38,6,66,61,4
2,100,54,25,12,27,82,10,89,80,33,3
3,2,95,1,96,32,6,40,31,9,59,2
4,53,1,47,93,26,3,46,16,42,39,4


In [136]:
families.isnull().sum()

choice_0    0
choice_1    0
choice_2    0
choice_3    0
choice_4    0
choice_5    0
choice_6    0
choice_7    0
choice_8    0
choice_9    0
n_people    0
dtype: int64

In [137]:
families.shape

(5000, 11)

In [138]:
submission

Unnamed: 0_level_0,assigned_day
family_id,Unnamed: 1_level_1
0,100
1,99
2,98
3,97
4,96
...,...
4995,5
4996,4
4997,3
4998,2


In [139]:
# in order to work on just 1 dataframe, I will join both families and submission. After
# finding a solution, I will update the submission dataframe.

famsub = families.join(submission, on = 'family_id')

In [255]:
family_size_dict = famsub[['n_people']].to_dict()['n_people']

columns = [f'choice_{i}' for i in range(10)]
choice_dict = famsub[columns].to_dict()

N_DAYS = 100
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

# from 100 to 1
days = list(range(N_DAYS,0,-1))

In [None]:
def scoring(column):
    penalty = 0

    # We'll use this to count the number of people scheduled each day
    daily_occupancy = {k:0 for k in days}
    
    # Looping over each family; d is the day for each family f
    for f, d in enumerate(column):
        # Using our lookup dictionaries to make simpler variable names
        n = family_size_dict[f]
        choice_0 = choice_dict['choice_0'][f]
        choice_1 = choice_dict['choice_1'][f]
        choice_2 = choice_dict['choice_2'][f]
        choice_3 = choice_dict['choice_3'][f]
        choice_4 = choice_dict['choice_4'][f]
        choice_5 = choice_dict['choice_5'][f]
        choice_6 = choice_dict['choice_6'][f]
        choice_7 = choice_dict['choice_7'][f]
        choice_8 = choice_dict['choice_8'][f]
        choice_9 = choice_dict['choice_9'][f]
    
        # checking the daily occupancy
        if daily_occupancy[d] + n > 300:
            break
        else:
            daily_occupancy[d] += n
            
        

In [221]:
def accounting_penalty(column):

    n1 = famsub.loc[df['assigned_day'] == 100]['n_people'].sum()
    penalty = 0
    
    for day in reversed(range (1, 101)):
        n0 = df.loc[df['assigned_day'] == day]['n_people'].sum()
        
        exp = 0.5 + abs(n0 - n1)/50
        result = (n0 - 125) / 400 * n0**exp
        
        penalty += result
        n1 = n0
        
    return penalty

# to include in the function: check the length of the dataframe
# check other possible errors 
# think about more understandable names for variables


In [222]:
def preference_cost(row):
 
    people = row['n_people']

    gifts = {0 : 0, 
             1 : 50, 
             2 : 50 + 9*people,
             3 : 100 + 9*people,
             4 : 200 + 9*people,
             5 : 200 + 18*people,
             6 : 300 + 18*people,
             7 : 300 + 36*people,
             8 : 400 + 36*people,
             9 : 500 + 36*people + 199*people
            }
    
    other_gift = 500 + 36*people + 398*people
    
    for i in range(10):
        if row[i] == row['assigned_day']:
            return gifts[i]
        else:
            pass
        return other_gift
        
# check other possible errors 

In [236]:
def score(column):
       
    # first we need to calculate the daily occupancy to get sure it fits the rules.
    daily_occ = {el:0 for el in reversed(range (1, 101))}
    
    for day in reversed(range (1, 101)):
        daily_occ = df.loc[df['assigned_day'] == day]['n_people'].sum()
        if 125 <= daily_occ <= 300: 
            pass
        else:
            print('Some assignments are out of range (125-300). Try it again')
            return np.NaN
            break
            
    
    # second step is to calculate the gift cost.
    gift_cost = pd.DataFrame(df).apply(lambda x: preference_cost(x), axis = 1).sum()
    
    # third step is to calculate the accounting penalty.
    penalty = accounting_penalty(df)
    
    return gift_cost + penalty

In [242]:
famsub.columns

Index(['choice_0', 'choice_1', 'choice_2', 'choice_3', 'choice_4', 'choice_5',
       'choice_6', 'choice_7', 'choice_8', 'choice_9', 'n_people',
       'assigned_day'],
      dtype='object')

In [237]:
famsub.apply(lambda x: preference_cost(x), axis = 1).sum()

0

In [238]:
score(famsub)

Some assignments are out of range (125-300). Try it again


nan

In [244]:
daily_occ = {el:0 for el in reversed(range (1, 101))}

In [None]:
# Start with the sample submission values
best = submission['assigned_day'].tolist()
start_score = cost_function(best)

new = best.copy()
# loop over each family
for fam_id, _ in enumerate(best):
    # loop over each family choice
    for pick in range(10):
        day = choice_dict[f'choice_{pick}'][fam_id]
        temp = new.copy()
        temp[fam_id] = day # add in the new pick
        if cost_function(temp) < start_score:
            new = temp.copy()
            start_score = cost_function(new)

submission['assigned_day'] = new
score = cost_function(new)
# submission.to_csv(f'submission_{score}.csv')
print(f'Score: {score}')