In [None]:
import numpy as np
import pandas as pd
import docplex

In [None]:
data = pd.read_csv('dataset/family_data.csv', index_col='family_id')
submission = pd.read_csv('dataset/sample_submission.csv', index_col='family_id')

In [None]:
from docplex.mp.model import Model
mdl = Model(name='santa')
mdl.parameters.threads = 2
mdl.parameters.mip.tolerances.mipgap = 0.0

In [None]:
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

In [None]:
days = range(1,101)
count = range(5000)

In [None]:
# Decision Variables
F = mdl.binary_var_matrix(count,days)

In [None]:
# Constraints
mdl.add_constraints(mdl.sum(F[f,d]*data.n_people[f] for f in count) <= MAX_OCCUPANCY for d in days)
mdl.add_constraints(mdl.sum(F[f,d]*data.n_people[f] for f in count) >= MIN_OCCUPANCY for d in days)
mdl.add_constraints(mdl.sum(F[f,d] for d in days) == 1 for f in count)

In [None]:
fam_costs = np.zeros((5000,101))
choices = data[['choice_'+str(i) for i in range(10)]].values
for f in range(5000):
    for d in range(1,101):
        l = list(choices[f])
        if d in l:
            if l.index(d) == 0:
                fam_costs[f,d] = 0
            elif l.index(d) == 1:
                fam_costs[f,d] = 50
            elif l.index(d) == 2:
                fam_costs[f,d] = 50 + 9 * data.n_people[f]
            elif l.index(d) == 3:
                fam_costs[f,d] = 100 + 9 * data.n_people[f]
            elif l.index(d) == 4:
                fam_costs[f,d] = 200 + 9 * data.n_people[f]
            elif l.index(d) == 5:
                fam_costs[f,d] = 200 + 18 * data.n_people[f]
            elif l.index(d) == 6:
                fam_costs[f,d] = 300 + 18 * data.n_people[f]
            elif l.index(d) == 7:
                fam_costs[f,d] = 300 + 36 * data.n_people[f]
            elif l.index(d) == 8:
                fam_costs[f,d] = 400 + 36 * data.n_people[f]
            elif l.index(d) == 9:
                fam_costs[f,d] = 500 + 235 * data.n_people[f]
        else:
            fam_costs[f,d] = 500 + 434 * data.n_people[f]

In [None]:
cost = mdl.sum(F[f,d]*fam_costs[f,d] for f in count for d in days)
mdl.minimize(cost)

In [None]:
mdl.solve()

In [None]:
# prediction = submission['assigned_day'].values
# desired = data.values[:, :-1]
# family_size = data.n_people.values
# penalties = np.asarray([
#     [
#         0,
#         50,
#         50 + 9 * n,
#         100 + 9 * n,
#         200 + 9 * n,
#         200 + 18 * n,
#         300 + 18 * n,
#         300 + 36 * n,
#         400 + 36 * n,
#         500 + 36 * n + 199 * n,
#         500 + 36 * n + 398 * n
#     ] for n in range(family_size.max() + 1)
# ])

In [None]:
# prediction = submission['assigned_day'].values
# desired = data.values[:, :-1]
# family_size = data.n_people.values
# penalties = np.asarray([
#     [
#         0,
#         50,
#         50 + 9 * n,
#         100 + 9 * n,
#         200 + 9 * n,
#         200 + 18 * n,
#         300 + 18 * n,
#         300 + 36 * n,
#         400 + 36 * n,
#         500 + 36 * n + 199 * n,
#         500 + 36 * n + 398 * n
#     ] for n in range(family_size.max() + 1)
# ])
# @njit(nopython=True)
# def jited_cost(prediction, desired, family_size, penalties):
#     N_DAYS = 100
#     MAX_OCCUPANCY = 300
#     MIN_OCCUPANCY = 125
#     penalty = 0
#     daily_occupancy = np.zeros(N_DAYS + 1, dtype=np.int64)
#     for i in range(len(prediction)):
#         n = family_size[i]
#         pred = prediction[i]
#         n_choice = 0
#         for j in range(len(desired[i])):
#             if desired[i, j] == pred:
#                 break
#             else:
#                 n_choice += 1
        
#         daily_occupancy[pred - 1] += n
#         penalty += penalties[n, n_choice]

#     accounting_cost = 0
#     n_out_of_range = 0
#     daily_occupancy[-1] = daily_occupancy[-2]
#     for day in range(N_DAYS):
#         n_next = daily_occupancy[day + 1]
#         n = daily_occupancy[day]
#         n_out_of_range += (n > MAX_OCCUPANCY) or (n < MIN_OCCUPANCY)
#         diff = abs(n - n_next)
#         accounting_cost += max(0, (n-125.0) / 400.0 * n**(0.5 + diff / 50.0))

#     penalty += accounting_cost
#     return np.asarray([penalty, n_out_of_range])

In [None]:
# %timeit jited_cost(prediction, desired, family_size, penalties)