In [57]:
import os
import pandas as pd
import numpy as np
import math
from scipy.optimize import linear_sum_assignment
import datetime as dt
from collections import defaultdict, Counter

## Visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [58]:
N_CHILDREN = 1000000
N_GIFT_TYPE = 1000
N_GIFT_QUANTITY = 1000
N_GIFT_PREF = 1000
N_CHILD_PREF = 100
TRIPLETS = 5001
TWINS = 45001

In [59]:
CHILD_PREF = pd.read_csv('../input/child_wishlist_v2.csv', header=None).drop(0, 1).values
GIFT_PREF = pd.read_csv('../input/gift_goodkids_v2.csv', header=None).drop(0, 1).values
print CHILD_PREF.shape
print GIFT_PREF.shape

(1000000, 100)
(1000, 1000)


In [60]:
import sys
sys.path.append('../src/')
from progress import ProgressBar
pbar = ProgressBar()

In [61]:
GIFT_HAPPINESS = {}
pbar.setBar(N_GIFT_TYPE)
for g in range(N_GIFT_TYPE):
    pbar.show(g)
    GIFT_HAPPINESS[g] = defaultdict(lambda: -1. / (2 * N_GIFT_PREF))
    for i, c in enumerate(GIFT_PREF[g]):
        GIFT_HAPPINESS[g][c] = 1. * (N_GIFT_PREF - i) / N_GIFT_PREF

CHILD_HAPPINESS = {}
pbar.setBar(N_CHILDREN)
for c in range(N_CHILDREN):
    pbar.show(c)
    CHILD_HAPPINESS[c] = defaultdict(lambda: -1. / (2 * N_CHILD_PREF))
    for i, g in enumerate(CHILD_PREF[c]):
        CHILD_HAPPINESS[c][g] = 1. * (N_CHILD_PREF - i) / N_CHILD_PREF

%time GIFT_IDS = np.array([[g] * N_GIFT_QUANTITY for g in range(N_GIFT_TYPE)]).flatten()
print(GIFT_IDS[:20])

CPU times: user 66.5 ms, sys: 1.58 ms, total: 68.1 ms
Wall time: 67.8 ms
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [62]:
N_TRIPLET = 1667
TRIPLET_GIFT_HAPPINESS = np.zeros((N_TRIPLET, N_GIFT_TYPE))
TRIPLET_CHILD_HAPPINESS = np.zeros((N_TRIPLET, N_GIFT_TYPE))
pbar.setBar(N_TRIPLET)
for i in range(N_TRIPLET):
    pbar.show(i)
    for g in range(N_GIFT_TYPE):
        TRIPLET_GIFT_HAPPINESS[i][g] = GIFT_HAPPINESS[g][3*i] + GIFT_HAPPINESS[g][3*i + 1] + GIFT_HAPPINESS[g][3*i +2]
        TRIPLET_CHILD_HAPPINESS[i][g] = CHILD_HAPPINESS[3*i][g]+CHILD_HAPPINESS[3*i+1][g]+CHILD_HAPPINESS[3*i+2][g]
print TRIPLET_CHILD_HAPPINESS[0]
print TRIPLET_GIFT_HAPPINESS[0]

[ 0.59  -0.015 -0.015  0.79   0.04  -0.015 -0.015 -0.015 -0.015 -0.015
 -0.015 -0.015 -0.015 -0.015  0.72  -0.015 -0.015 -0.015  0.05   0.99
 -0.015 -0.015 -0.015 -0.015  0.68  -0.015 -0.015 -0.015 -0.015  0.61
 -0.015  0.33  -0.015 -0.015  0.3   -0.015 -0.015 -0.015 -0.015  0.6    0.06
 -0.015 -0.015 -0.015 -0.015 -0.015 -0.015 -0.015  0.07   0.28  -0.015
  0.515 -0.015 -0.015  0.67  -0.015 -0.015 -0.015 -0.015  0.4   -0.015
  0.99   0.1   -0.015  0.43  -0.015  0.45  -0.015 -0.015 -0.015 -0.015
  1.525  0.44  -0.015 -0.015 -0.015  0.55  -0.015 -0.015  0.18   0.4   -0.015
  0.34  -0.015  0.395  0.68  -0.015 -0.015 -0.015 -0.015 -0.015  0.17
 -0.015  0.    -0.015  0.97   0.05  -0.015 -0.015 -0.015 -0.015 -0.015
 -0.015 -0.015  0.21  -0.015  0.81  -0.015  0.94  -0.015  0.51  -0.015
 -0.015 -0.015 -0.015  0.07  -0.015 -0.015 -0.015 -0.015  0.08   0.97
 -0.015  0.75  -0.015  0.82  -0.015  0.95  -0.015 -0.015 -0.015 -0.015
 -0.015  0.13   1.51  -0.015  0.8    0.66  -0.015 -0.015 -0.015 -0.0

In [63]:
N_TWIN = 20000
TWIN_GIFT_HAPPINESS = np.zeros((N_TWIN, N_GIFT_TYPE))
TWIN_CHILD_HAPPINESS = np.zeros((N_TWIN, N_GIFT_TYPE))
pbar.setBar(N_TWIN)
for i in range(N_TWIN):
    pbar.show(i)
    for g in range(N_GIFT_TYPE):
        TWIN_GIFT_HAPPINESS[i][g] = GIFT_HAPPINESS[g][TRIPLETS + i*2] + GIFT_HAPPINESS[g][TRIPLETS + i*2 + 1]
        TWIN_CHILD_HAPPINESS[i][g] = CHILD_HAPPINESS[TRIPLETS + i*2][g] + CHILD_HAPPINESS[TRIPLETS + i*2 + 1][g]
print TWIN_CHILD_HAPPINESS[0]
print TWIN_GIFT_HAPPINESS[0]

[ 0.855 -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01
  0.775 -0.01  -0.01  -0.01  -0.01  -0.01  -0.01   0.585  0.645 -0.01  -0.01
  0.305 -0.01  -0.01  -0.01   0.545 -0.01  -0.01  -0.01  -0.01  -0.01  -0.01
 -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01   0.395
 -0.01  -0.01  -0.01  -0.01   0.445 -0.01  -0.01  -0.01   0.425 -0.01  -0.01
  0.695 -0.01   0.115 -0.01  -0.01  -0.01  -0.01   0.255 -0.01  -0.01
  0.625  0.355 -0.01  -0.01   0.885 -0.01  -0.01   0.135 -0.01   0.285
 -0.01  -0.01  -0.01  -0.01  -0.01  -0.01   0.005  0.035 -0.01  -0.01  -0.01
 -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01
 -0.01  -0.01  -0.01  -0.01  -0.01   0.715 -0.01  -0.01   0.895 -0.01
  0.395  0.015  1.76  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01  -0.01
 -0.01  -0.01   0.735 -0.01  -0.01  -0.01  -0.01   0.575  0.805 -0.01  -0.01
 -0.01   0.155 -0.01  -0.01  -0.01   0.715 -0.01  -0.01  -0.01  -0.01  -0.01
 -0.01  -0.01  -0.01   0.

In [64]:
INITIAL_SUBMISSION = '../src/twtr.csv'

In [65]:
def my_avg_normalized_happiness(pred):
    total_child_happiness = 0
    total_gift_happiness = np.zeros(1000)
    print "COMPUTE NORMALIZED HAPPINESS:"
    pbar.setBar(len(pred))
    for i, [c,g] in enumerate(pred):
        pbar.show(i)
        #c = pred[i][0]
        #g = pred[i][1]
        total_child_happiness += CHILD_HAPPINESS[c][g]
        total_gift_happiness[g] += GIFT_HAPPINESS[g][c]
    nch = total_child_happiness / N_CHILDREN
    ngh = np.mean(total_gift_happiness) / 1000
    print('normalized child happiness', nch)
    print('normalized gift happiness', ngh)
    return nch**3. + ngh**3., ngh*N_CHILDREN, nch*N_CHILDREN

In [66]:
subm = pd.read_csv(INITIAL_SUBMISSION)
initial_anh, g, c = my_avg_normalized_happiness(subm[['ChildId', 'GiftId']].values.tolist())
print(initial_anh, g, c)
subm['gift_rank'] = subm.groupby('GiftId').rank() - 1
subm['gift_id'] = subm['GiftId'] * 1000 + subm['gift_rank']
subm['gift_id'] = subm['gift_id'].astype(np.int32)
current_gift_ids = subm['gift_id'].values

COMPUTE NORMALIZED HAPPINESS:
('normalized child happiness', 0.9703606899967299)
('normalized gift happiness', 0.00069028150000000987)
(0.91369149861267196, 690.28150000000983, 970360.6899967298)


In [67]:
from IPython.display import display
display(subm[:8])

Unnamed: 0,ChildId,GiftId,gift_rank,gift_id
0,0,200,0.0,200000
1,1,200,1.0,200001
2,2,200,2.0,200002
3,3,245,0.0,245000
4,4,245,1.0,245001
5,5,245,2.0,245002
6,6,791,0.0,791000
7,7,791,1.0,791001


In [68]:
tri_tmp = subm[['ChildId', 'GiftId']][:TRIPLETS].as_matrix()
print tri_tmp.shape
print tri_tmp[:10]

(5001, 2)
[[  0 200]
 [  1 200]
 [  2 200]
 [  3 245]
 [  4 245]
 [  5 245]
 [  6 791]
 [  7 791]
 [  8 791]
 [  9 240]]


In [69]:
tri_mat = np.array([[i, tri_tmp[3*i][1]] for i in range(N_TRIPLET)])
print tri_mat.shape
tri_df = pd.DataFrame({'ChildId' : tri_mat[:,0], 'GiftId' : tri_mat[:, 1]})
display(tri_df[:10])

(1667, 2)


Unnamed: 0,ChildId,GiftId
0,0,200
1,1,245
2,2,791
3,3,240
4,4,824
5,5,206
6,6,472
7,7,394
8,8,590
9,9,396


In [70]:
twin_tmp = subm[['ChildId', 'GiftId']][TRIPLETS: TWINS].as_matrix()
print twin_tmp.shape
print twin_tmp[:10]

(40000, 2)
[[5001  768]
 [5002  768]
 [5003  817]
 [5004  817]
 [5005  636]
 [5006  636]
 [5007  181]
 [5008  181]
 [5009  530]
 [5010  530]]


In [71]:
N_TWIN = 20000
twin_mat = np.array([[i, twin_tmp[2*i][1]] for i in range(N_TWIN)])
print twin_mat.shape
twin_df = pd.DataFrame({'ChildId' : twin_mat[:,0], 'GiftId' : twin_mat[:, 1]})
display(twin_df[:10])

(20000, 2)


Unnamed: 0,ChildId,GiftId
0,0,768
1,1,817
2,2,636
3,3,181
4,4,530
5,5,507
6,6,215
7,7,410
8,8,705
9,9,998


In [72]:
single_df = subm[['ChildId', 'GiftId']][TWINS:]
display(single_df[:10])

Unnamed: 0,ChildId,GiftId
45001,45001,315
45002,45002,183
45003,45003,803
45004,45004,927
45005,45005,129
45006,45006,590
45007,45007,42
45008,45008,911
45009,45009,525
45010,45010,273


In [73]:
def tri_happiness(pred):
    gh = 0.
    ch = 0.
    for i, [c,g] in enumerate(pred):
        gh += TRIPLET_GIFT_HAPPINESS[c][g]
        ch += TRIPLET_CHILD_HAPPINESS[c][g]
    print('triplet child happiness', ch)
    print('triplet gift happiness', gh)
    return gh, ch

In [74]:
tr_gh, tr_ch = tri_happiness(tri_df[['ChildId', 'GiftId']].values.tolist())

('triplet child happiness', 2007.4350000000009)
('triplet gift happiness', 1.9699999999999762)


In [75]:
def twin_happiness(pred):
    gh = 0.
    ch = 0.
    for i, [c,g] in enumerate(pred):
        gh += TWIN_GIFT_HAPPINESS[c][g]
        ch += TWIN_CHILD_HAPPINESS[c][g]
    print('twin child happiness', ch)
    print('twin gift happiness', gh)
    return gh, ch
tw_gh, tw_ch = twin_happiness(twin_df[['ChildId', 'GiftId']].values.tolist())

('twin child happiness', 24447.640000000956)
('twin gift happiness', 19.520499999997554)


In [76]:
def single_happiness(pred):
    gh = 0.
    ch = 0.
    print("COMPUTE SINGLE HAPPINESS...")
    pbar.setBar(len(pred))
    for i, [c,g] in enumerate(pred):
        pbar.show(i)
        gh += GIFT_HAPPINESS[g][c]
        ch += CHILD_HAPPINESS[c][g]
    print('single child happiness', ch)
    print('single gift happiness', gh)
    return gh, ch
si_gh, si_ch = single_happiness(subm[['ChildId', 'GiftId']][TWINS:].values.tolist())

COMPUTE SINGLE HAPPINESS...
('single child happiness', 943905.6149968591)
('single gift happiness', 668.7910000092245)


In [77]:
gh = si_gh + tw_gh + tr_gh
ch = si_ch + tw_ch + tr_ch
score = (gh/N_CHILDREN)**3. + (ch/N_CHILDREN)**3.
print("CURRENT SCORE = {0}".format(str(score)))

CURRENT SCORE = 0.913691498613


In [78]:
### Define a new entropy term
def entropy(gh, ch, g, c):
    return 3.*gh*g*(g + gh) + g**3 + 3.*ch*c*(c + ch) + c**3
### Optimize the total entropy
def optimize_single_block(child_block, gift_block, gh, ch):
    SIZE = int(len(child_block))
    C = np.zeros((SIZE, SIZE))
    for i in range(SIZE):
        c = child_block[i]
        for j in range(SIZE):
            g = gift_block[j]
            C[i, j] = -1. * entropy(gh, ch, GIFT_HAPPINESS[g][c], CHILD_HAPPINESS[c][g])
    row_ind, col_ind = linear_sum_assignment(C)
    return (child_block[row_ind], gift_block[col_ind])

In [79]:
def optimize_twin_block(child_block, gift_block, gh, ch):
    SIZE = int(len(child_block))
    C = np.zeros((SIZE, SIZE))
    for i in range(SIZE):
        c = child_block[i]
        for j in range(SIZE):
            g = gift_block[j]
            C[i, j] = -1. * entropy(gh, ch, TWIN_GIFT_HAPPINESS[c][g], TWIN_CHILD_HAPPINESS[c][g])
    row_ind, col_ind = linear_sum_assignment(C)
    return (child_block[row_ind], gift_block[col_ind])

In [80]:
def optimize_triplet_block(child_block, gift_block, gh, ch):
    SIZE = int(len(child_block))
    C = np.zeros((SIZE, SIZE))
    for i in range(SIZE):
        c = child_block[i]
        for j in range(SIZE):
            g = gift_block[j]
            C[i, j] = -1. * entropy(gh, ch, TRIPLET_GIFT_HAPPINESS[c][g], TRIPLET_CHILD_HAPPINESS[c][g])
    row_ind, col_ind = linear_sum_assignment(C)
    return (child_block[row_ind], gift_block[col_ind])

In [81]:
BLOCK_SIZE = 261
N_BLOCKS = int((N_CHILDREN - TWINS + BLOCK_SIZE - 1) / BLOCK_SIZE)
## optimize single
print gh, ch
single_ids = subm['GiftId'].values
child_blocks = np.split(np.random.permutation(range(TWINS, N_CHILDREN)), N_BLOCKS)
pbar.setBar(200)
for j in range(200):
    pbar.show(j)
    child_block = child_blocks[j]
    gift_block = single_ids[child_block]
    cids, gids = optimize_single_block(child_block, gift_block, gh=gh, ch=ch)
    single_ids[cids] = gids
subm['GiftId'] = single_ids
si_gh, si_ch = single_happiness(subm[['ChildId', 'GiftId']][TWINS:].values.tolist())
gh = si_gh + tw_gh + tr_gh
ch = si_ch + tw_ch + tr_ch
score = (gh/N_CHILDREN)**3. + (ch/N_CHILDREN)**3.
print(score, gh, ch, si_gh, si_ch)
print "\n\n"

690.281500009 970360.689997
COMPUTE SINGLE HAPPINESS...
('single child happiness', 943906.6249968589)
('single gift happiness', 668.7910000092245)
(0.9136943516636109, 690.2815000092221, 970361.69999685988, 668.7910000092245, 943906.6249968589)





In [82]:
triplet_idx = tri_df['GiftId'].values
for j in range(1):
    triplet_block = np.random.permutation(range(0, N_TRIPLET))[:200]
    tr_g_block = triplet_idx[triplet_block]
    print tr_g_block.shape, triplet_block.shape
    cids, gids = optimize_triplet_block(triplet_block, tr_g_block, gh=gh, ch=ch)
    triplet_idx[cids] = gids
tri_df['GiftId'] = triplet_idx
tr_gh, tr_ch = tri_happiness(tri_df[['ChildId', 'GiftId']].values.tolist())
gh = si_gh + tw_gh + tr_gh
ch = si_ch + tw_ch + tr_ch
score = (gh/N_CHILDREN)**3. + (ch/N_CHILDREN)**3.
print(score, gh, ch, si_gh, si_ch)
print "\n\n"

(200,) (200,)
('triplet child happiness', 2007.4350000000009)
('triplet gift happiness', 1.9699999999999762)
(0.9136943516636109, 690.2815000092221, 970361.69999685988, 668.7910000092245, 943906.6249968589)





In [83]:
twin_idx = twin_df['GiftId'].values
for j in range(5):
    twin_block = np.random.permutation(range(0, N_TWIN))[:200]
    tw_g_block = twin_idx[twin_block]
    cids, gids = optimize_twin_block(twin_block, tw_g_block, gh=gh, ch=ch)
    twin_idx[cids] = gids
twin_df['GiftId'] = twin_idx
tw_gh, tw_ch = twin_happiness(twin_df[['ChildId', 'GiftId']].values.tolist())
gh = si_gh + tw_gh + tr_gh
ch = si_ch + tw_ch + tr_ch
score = (gh/N_CHILDREN)**3. + (ch/N_CHILDREN)**3.
print(score, gh, ch, si_gh, si_ch)
print "\n\n"

('twin child happiness', 24449.530000000959)
('twin gift happiness', 19.520499999997554)
(0.91369969055637912, 690.2815000092221, 970363.5899968599, 668.7910000092245, 943906.6249968589)





In [84]:
tri_list = tri_df.GiftId.tolist()
tl = []
for g in tri_list:
    tl += [g]*3
print len(tl)
print tl

5001
[200, 200, 200, 245, 245, 245, 791, 791, 791, 240, 240, 240, 824, 824, 824, 206, 206, 206, 472, 472, 472, 394, 394, 394, 590, 590, 590, 396, 396, 396, 196, 196, 196, 494, 494, 494, 873, 873, 873, 965, 965, 965, 130, 130, 130, 194, 194, 194, 77, 77, 77, 692, 692, 692, 799, 799, 799, 671, 671, 671, 201, 201, 201, 302, 302, 302, 756, 756, 756, 46, 46, 46, 247, 247, 247, 690, 690, 690, 629, 629, 629, 455, 455, 455, 389, 389, 389, 782, 782, 782, 951, 951, 951, 671, 671, 671, 475, 475, 475, 186, 186, 186, 320, 320, 320, 701, 701, 701, 998, 998, 998, 409, 409, 409, 356, 356, 356, 118, 118, 118, 225, 225, 225, 176, 176, 176, 397, 397, 397, 950, 950, 950, 270, 270, 270, 777, 777, 777, 411, 411, 411, 155, 155, 155, 671, 671, 671, 57, 57, 57, 944, 944, 944, 310, 310, 310, 998, 998, 998, 429, 429, 429, 884, 884, 884, 982, 982, 982, 220, 220, 220, 494, 494, 494, 799, 799, 799, 437, 437, 437, 766, 766, 766, 921, 921, 921, 569, 569, 569, 171, 171, 171, 361, 361, 361, 309, 309, 309, 943, 943, 943

In [85]:
twin_list = twin_df.GiftId.tolist()
tw = []
for g in twin_list:
    tw += [g]*2
print len(tw)
print tw

40000
[768, 768, 817, 817, 636, 636, 181, 181, 530, 530, 507, 507, 215, 215, 410, 410, 705, 705, 998, 998, 568, 568, 619, 619, 869, 869, 163, 163, 856, 856, 998, 998, 950, 950, 621, 621, 396, 396, 972, 972, 15, 15, 571, 571, 980, 980, 686, 686, 449, 449, 692, 692, 118, 118, 232, 232, 154, 154, 58, 58, 344, 344, 139, 139, 951, 951, 633, 633, 992, 992, 812, 812, 36, 36, 527, 527, 887, 887, 264, 264, 759, 759, 489, 489, 565, 565, 457, 457, 409, 409, 770, 770, 517, 517, 494, 494, 163, 163, 699, 699, 933, 933, 338, 338, 671, 671, 109, 109, 907, 907, 837, 837, 592, 592, 249, 249, 112, 112, 788, 788, 846, 846, 856, 856, 359, 359, 290, 290, 300, 300, 785, 785, 592, 592, 863, 863, 525, 525, 917, 917, 23, 23, 808, 808, 599, 599, 240, 240, 573, 573, 494, 494, 304, 304, 922, 922, 902, 902, 176, 176, 220, 220, 166, 166, 107, 107, 112, 112, 500, 500, 904, 904, 433, 433, 913, 913, 150, 150, 784, 784, 479, 479, 494, 494, 123, 123, 250, 250, 836, 836, 118, 118, 279, 279, 494, 494, 494, 494, 985, 985, 9

In [86]:
single_list = subm.GiftId.tolist()[TWINS:]
dic = {}
dic['ChildId'] = [i for i in range(N_CHILDREN)]
dic['GiftId'] = tl + tw + single_list
output = pd.DataFrame(dic)
display(output[:10])

Unnamed: 0,ChildId,GiftId
0,0,200
1,1,200
2,2,200
3,3,245
4,4,245
5,5,245
6,6,791
7,7,791
8,8,791
9,9,240


In [87]:
## output.to_csv('../src/twtr.csv', index=False)

In [88]:
## 3 = 1+1+1
GIFT_POOL = [[]]*N_GIFT_TYPE
display(tri_df[:5])
display(twin_df[: 5])
display(output[45001:45005])

Unnamed: 0,ChildId,GiftId
0,0,200
1,1,245
2,2,791
3,3,240
4,4,824


Unnamed: 0,ChildId,GiftId
0,0,768
1,1,817
2,2,636
3,3,181
4,4,530


Unnamed: 0,ChildId,GiftId
45001,45001,315
45002,45002,183
45003,45003,803
45004,45004,927


In [89]:
tr_shuffle = np.random.permutation(range(0, N_TRIPLET))[:1000]
triplet_idx = tri_df['GiftId'].values
tr_sp = triplet_idx[tr_shuffle]
print tr_sp[:5]

[417 513 671 998 995]


In [90]:
TWIN_POOL = [[] for i in range(N_GIFT_TYPE)]
for i,g in enumerate(twin_df['GiftId'].values):
    TWIN_POOL[g].append(i)
print np.shape(TWIN_POOL)

(1000,)


In [91]:
SINGLE_POOL = [[] for i in range(N_GIFT_TYPE)]
for [c,g] in output[45001:].as_matrix():
    SINGLE_POOL[g].append(c)
print len(SINGLE_POOL[0])

984


In [92]:
pool31 = []
map_31 = []
for i in range(1000):
    pool31.append(tr_sp[i])
    tr = tr_shuffle[i]
    map_31.append([tr*3, tr*3+1, tr*3+2])
print len(pool31)
print pool31[0]
print map_31[0]
print triplet_idx[3054/3]
pick = output.GiftId.values
print pick[3054], pick[3055], pick[3056]

1000
417
[3687, 3688, 3689]
282
282 282 282


In [93]:
for i in range(N_GIFT_TYPE):
    n_p = int(min(4, len(SINGLE_POOL[i])/3))
    idx = np.random.permutation(range(0, len(SINGLE_POOL[i])))
    if(n_p < 4):
        print "shaocong", i
    for j in range(n_p):
        k = int(len(pool31))
        pool31.append(i)
        map_31.append([SINGLE_POOL[i][idx[j*3]], SINGLE_POOL[i][idx[j*3+1]], SINGLE_POOL[i][idx[j*3+2]] ])
print len(pool31)
print pool31[1000]
print map_31[1000]
print output.GiftId.values[map_31[1000][0]],output.GiftId.values[map_31[1000][1]],output.GiftId.values[map_31[1000][2]]

shaocong 494
4996
0
[261808, 662909, 209376]
0 0 0


In [94]:
child_idx_31 = np.array([i for i in range(len(pool31))])
gift_idx_31 = np.array(pool31)

In [95]:
bk_size = 300
blocks_31 = np.random.permutation(range(0, len(pool31)))

In [96]:
def get_total_happiness(vec):
    gh = 0.
    ch = 0.
    print("COMPUTE TOTAL HAPPINESS...")
    pbar.setBar(len(vec))
    for c,g in enumerate(vec):
        pbar.show(c)
        gh += GIFT_HAPPINESS[g][c]
        ch += CHILD_HAPPINESS[c][g]
    print('single child happiness', ch)
    print('single gift happiness', gh)
    return gh, ch
gh, ch = get_total_happiness(pick)
score = (gh/N_CHILDREN)**3. + (ch/N_CHILDREN)**3.
print score

COMPUTE TOTAL HAPPINESS...
('single child happiness', 970363.5899967295)
('single gift happiness', 690.2815000095043)
0.913699690556


In [97]:
def optimize_31_block(child_block, gift_block, gh, ch):
    SIZE = int(len(child_block))
    C = np.zeros((SIZE, SIZE))
    for i in range(SIZE):
        k = child_block[i]
        for j in range(SIZE):
            g = gift_block[j]
            sgh = 0.
            sch = 0.
            for c in map_31[k]:
                sgh += GIFT_HAPPINESS[g][c]
                sch += CHILD_HAPPINESS[c][g]
            C[i, j] = -1. * entropy(gh, ch, sgh, sch)
    row_ind, col_ind = linear_sum_assignment(C)
    return (child_block[row_ind], gift_block[col_ind])

In [108]:
## Optimize 31
bk_size = 200
blocks_31 = np.random.permutation(range(0, len(pool31)))
#blocks_31 = np.arange(0, len(pool31))
for j in range(20):
    child_block = blocks_31[j*bk_size: (j+1)*bk_size]
    gift_block = gift_idx_31[child_block]
    cids, gids = optimize_31_block(child_block, gift_block, gh=gh, ch=ch)
    gift_idx_31[cids] = gids
for k,g in enumerate(gift_idx_31):
    for c in map_31[k]:
        pick[c] = g
gh, ch = get_total_happiness(pick)
score = (gh/N_CHILDREN)**3. + (ch/N_CHILDREN)**3.
print score
print cids

COMPUTE TOTAL HAPPINESS...
('single child happiness', 970363.5899967295)
('single gift happiness', 690.2815000095043)
0.913699690556
[ 265 1159 3513 2652 1687 4479  757   50 3658 2511 2202  932 3227 1407 4785
 1793  321 3656 2463 2189 3238  955 2001 2660 3984 2264 2355 3794 1777 4548
  186 2574  453  544 1140 3011  143 1570  574 2173  628 3999 4625 4862 3937
 4860 1595 4755 3816 4472 1313 2757 3215 4356  930 1328 4354 1830 1192 4174
 2619 1334  853 4126  134 3148 3158 2700 2380  269 4137 2383 4002 3991 2514
  466   14 4183  235 2121 2460 1653 2978 2810 1495 1204  274 4246 4468 4783
 3405 2929 4538  801 1385 1637 1542  641 1138 4040 4352 4290 3456 2842 2790
  961 1742 2701 4343 1783 2195 1994  826 1241  333 1203 2827 1858  350 4590
 1983  862 3889 2358 2445 1505 2687  389 4993 2579 4302 4730 1948 2296 3676
 4082 2940 1335  345 1599  336 2617  368 1282 1226 3240 4987 2775 4150 4539
 4554  722 3880 4457 4714 1594 1790 2218 1914 3320 1300 4957 2491 4711   22
 1730 4909 3933 1501 4745 2178 

In [109]:
for i in range(N_TRIPLET):
    assert pick[3*i] == pick[3*i+1] == pick[3*i+2]

In [110]:
for j in range(TRIPLETS, TWINS, 2):
    assert pick[j] == pick[j+1]

In [111]:
cnt = np.zeros(N_GIFT_TYPE)
for c,g in enumerate(pick):
    cnt[g] += 1
for cn in cnt:
    assert cn == 1000

In [112]:
output['GiftId'] = pick
output.to_csv('../src/twtr.csv', index=False)

In [113]:
min(1,2,3)

1