### Import important libraries

In [1]:
import pandas as pd
import numpy as np

### Merge data and output

In [2]:
full=pd.merge(pd.read_csv("family_data.csv"),pd.read_csv("sample_submission.csv"))

### Reusable Functions

In [4]:
def prefCost(people,choice=-1):
    consolation=[[0,0],[50,0],[50,9],[100,9],[200,9],[200,18],[300,18],[300,36],[400,36],[500,36],[500,36+398]]
    return (consolation[choice][0]+consolation[choice][1]*people*1.0) 

def accCost(full):
    segregated=(full.groupby(by=['assigned_day']).sum())[['n_people']]
    segregated.loc[101]=segregated.loc[100]
    segregated['Cost']=0.0
    segregated=segregated.reset_index('assigned_day')
    if((segregated['n_people']<125).any()):
        return 9999999999.0 
    if((segregated['n_people']>300).any()):
        return 9999999999.0 
    accounting_cost=0
    for day in range(100):
        diff=abs(segregated['n_people'][day]-segregated['n_people'][day+1])
        expPart=0.5+(diff/50.0)
        leftPart=(segregated['n_people'][day]-125.0)/400.0
        rightPart=segregated['n_people'][day] ** expPart
        segregated['Cost'][day]=(leftPart*rightPart)
    return segregated['Cost'].sum()

### Initial Assignment

In [5]:
#Assign -1 to all in full
full['assigned_choice']=-1
full['assigned_day']=-1
full['pref_cost']=-1

#Make a table for day-wise grouping
day_table=pd.DataFrame({'Day': range(1,101),'Count':0})
day_table.set_index('Day')


for day in range(0,100):
    print("Working for day "+str(day+1))
    #print("=========================")
    day_table.loc[day]['Count']=0
    for family in range(5000):
        #rint("Working on "+str(family))
        if(day_table.loc[day]['Count']>=208):
            print("Total for day "+str(day+1)+"="+str(day_table.loc[day]['Count']))
            break
        if(full['assigned_day'][family]>-1):
            continue
        full['assigned_day'][family]=day+1
        day_table['Count'][day]+=full['n_people'][family]
        #print("Adding family_id "+str(family))
        for choice in range(0,11):
            if(choice==10):
                full['assigned_choice'][family]=choice
                break
            if(full['choice_'+str(choice)][family]==day):
                full['assigned_choice'][family]=choice
                break
        
        full['pref_cost'][family]=prefCost(full['n_people'][family],full['assigned_choice'][family])

Working for day 1
Total for day 1=209
Working for day 2
Total for day 2=210
Working for day 3
Total for day 3=210
Working for day 4
Total for day 4=209
Working for day 5
Total for day 5=210
Working for day 6
Total for day 6=208
Working for day 7
Total for day 7=208
Working for day 8
Total for day 8=213
Working for day 9
Total for day 9=211
Working for day 10
Total for day 10=214
Working for day 11
Total for day 11=213
Working for day 12
Total for day 12=210
Working for day 13
Total for day 13=212
Working for day 14
Total for day 14=210
Working for day 15
Total for day 15=209
Working for day 16
Total for day 16=208
Working for day 17
Total for day 17=211
Working for day 18
Total for day 18=208
Working for day 19
Total for day 19=208
Working for day 20
Total for day 20=209
Working for day 21
Total for day 21=209
Working for day 22
Total for day 22=211
Working for day 23
Total for day 23=209
Working for day 24
Total for day 24=208
Working for day 25
Total for day 25=209
Working for day 26

### Verify Total Count should be correct in assignment

In [6]:
day_table['Count'].sum()

21003

In [7]:
full['n_people'].sum()

21003

### Check initial cost

In [8]:
print("Acc Cost="+str(accCost(full)))
print("Pref Cost="+str(full['pref_cost'].sum()))
print("score="+str(accCost(full)+full['pref_cost'].sum()))

Acc Cost=378.6761161291622
Pref Cost=10574644
score=10575022.67611613


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### Start Optimization by looping over choices now

In [9]:
full_bk=full.copy()

In [26]:
full=full_bk.copy()

In [29]:
def optimize(full, targetted_ids_list):
    #print("Input Cost="+str(accCost(full)+full['pref_cost'].sum()))
    print("score="+str(accCost(full)+full['pref_cost'].sum()))
    for family in targetted_ids_list:
        curr_acc_cost=accCost(full)
        curr_pref_cost=full['pref_cost'].sum()
        curr_cost=curr_acc_cost+curr_pref_cost
        curr_choice=full['assigned_choice'][family]
        curr_day=full['assigned_day'][family]
    
        #print("Family Id:"+str(family)+" Cost="+str(curr_cost)+"(Pref="+str(full['pref_cost'][family])+")"+" Day="+str(full['assigned_day'][family])+" Choice="+str(full['assigned_choice'][family]))
        
        #Assign optimized choice and cost same as current one
        allCosts=[]
        for choice in range(10):
            full['assigned_day'][family]=full['choice_'+str(choice)][family]
            full['assigned_choice'][family]=choice
            full['pref_cost'][family]=prefCost(full['n_people'][family],choice)
            allCosts.append(accCost(full)+full['pref_cost'].sum())
            
        
        optimized_choice=allCosts.index(min(allCosts))
        optimized_cost=min(allCosts)
        if(optimized_cost<curr_cost):
            full['assigned_day'][family]=full['choice_'+str(optimized_choice)][family]
            full['assigned_choice'][family]=optimized_choice
            full['pref_cost'][family]=prefCost(full['n_people'][family],full['assigned_choice'][family])
        else:    
            full['assigned_day'][family]=curr_day
            full['assigned_choice'][family]=curr_choice
            full['pref_cost'][family]=prefCost(full['n_people'][family],full['assigned_choice'][family])
        
        reduction=curr_cost-optimized_cost
        #print("NEW: Family Id:"+str(family)+" Cost="+str(optimized_cost)+"(Pref="+str(full['pref_cost'][family])+")"+" Day="+str(full['assigned_day'][family])+" Choice="+str(full['assigned_choice'][family])+" Reduction="+str(reduction))
        #print("=========================================================================")
    #print("Output Cost="+str(accCost(full)+full['pref_cost'].sum()))
    print("score="+str(accCost(full)+full['pref_cost'].sum()))

### Targetting all records with Choice=10

In [32]:
pd.options.display.max_rows=100
pd.options.display.max_columns=100
full.groupby(by=['assigned_choice','n_people']).describe()[['pref_cost']]

Unnamed: 0_level_0,Unnamed: 1_level_0,pref_cost,pref_cost,pref_cost,pref_cost,pref_cost,pref_cost,pref_cost,pref_cost
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
assigned_choice,n_people,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
0,2,503.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,3,668.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,4,1004.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,5,648.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,6,366.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,7,242.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,8,123.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,82.0,50.0,0.0,50.0,50.0,50.0,50.0,50.0
1,3,134.0,50.0,0.0,50.0,50.0,50.0,50.0,50.0
1,4,242.0,50.0,0.0,50.0,50.0,50.0,50.0,50.0


In [34]:
%%time
#big_families=list(full[(full['assigned_choice']==1)]['family_id'])
list_to_workupon=[]
list_to_revisit=[]
for choiceFactor in range(10,-1,-1):
    for sizeFactor in range(8,1,-1):
        list_to_workupon=list(full[(full['assigned_choice']==choiceFactor)&(full['n_people']==sizeFactor)]['family_id'])
        print("Working upon a fresh list for choice "+str(choiceFactor)+" and family-size "+str(sizeFactor)+". Length="+str(len(list_to_workupon)))
        optimize(full,list_to_workupon)
        list_to_revisit=list(full[(full['assigned_choice']>=choiceFactor)&(full['n_people']>=sizeFactor)]['family_id'])
        print("Working upon revisit list. Length="+str(len(list_to_revisit)))
        optimize(full,list_to_revisit)

Working upon a fresh list for choice 10 and family-size 8. Length=0
score=558470.8397595072
score=558470.8397595072
Working upon revisit list. Length=0
score=558470.8397595072
score=558470.8397595072

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Working upon a fresh list for choice 10 and family-size 7. Length=0
score=558470.8397595072
score=558470.8397595072
Working upon revisit list. Length=0
score=558470.8397595072
score=558470.8397595072
Working upon a fresh list for choice 10 and family-size 6. Length=0
score=558470.8397595072
score=558470.8397595072
Working upon revisit list. Length=0
score=558470.8397595072
score=558470.8397595072
Working upon a fresh list for choice 10 and family-size 5. Length=6
score=558470.8397595072
score=558470.8397595072
Working upon revisit list. Length=6
score=558470.8397595072
score=558470.8397595072
Working upon a fresh list for choice 10 and family-size 4. Length=65
score=558470.8397595072
score=558470.8397595072
Working upon revisit list. Length=71
score=558470.8397595072
score=558470.8397595072
Working upon a fresh list for choice 10 and family-size 3. Length=103
score=558470.8397595072
score=558470.8397595072
Working upon revisit list. Length=174
score=558470.8397595072
score=558470.8397

In [None]:

optimize()

In [35]:
a=full.groupby(by=['assigned_day']).sum()[['n_people']]
full.to_csv()

Unnamed: 0_level_0,n_people
assigned_day,Unnamed: 1_level_1
1,299
2,294
3,300
4,299
5,281
6,254
7,246
8,248
9,269
10,292


In [23]:
print(list_to_revisit)

[[41, 42, 134, 295, 350, 366, 370, 386, 413, 486, 504, 580, 636, 660, 751, 785, 799, 806, 819, 870, 873, 874, 878, 896, 939, 958, 1081, 1099, 1110, 1120, 1137, 1158, 1176, 1189, 1192, 1268, 1273, 1331, 1353, 1359, 1422, 1436, 1454, 1472, 1477, 1488, 1498, 1529, 1559, 1572, 1597, 1621, 1645, 1648, 1679, 1723, 1738, 1765, 1808, 1809, 1812, 1813, 1822, 1840, 1852, 1995, 2021, 2031, 2068, 2115, 2182, 2198, 2256, 2318, 2347, 2361, 2404, 2456, 2469, 2519, 2606, 2642, 2680, 2698, 2762, 2782, 2819, 2830, 2898, 2899, 2909, 3017, 3033, 3063, 3121, 3122, 3186, 3219, 3272, 3279, 3297, 3306, 3310, 3354, 3437, 3468, 3472, 3491, 3519, 3571, 3595, 3609, 3638, 3662, 3664, 3742, 3769, 3778, 3792, 3795, 3796, 3953, 3956, 3989, 4078, 4139, 4194, 4302, 4337, 4383, 4416, 4509, 4518, 4543, 4561, 4598, 4733, 4759, 4866, 4890, 4931, 4977, 4990]]
