In [41]:
import pandas as pd
import numpy as np
import math
# import datetime
from datetime import date, datetime
import subprocess
import bandit.main as bd
import calendar

In [42]:
def set_cohorts(df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # sort by score
    df = df.sort_values (score_col)
    df = df.reset_index ()
    
    # get distinct scores
    score_grp = df.groupby(score_col)
    dfgb = score_grp[score_col].agg(['count']).reset_index()
    score_cardinality = len(dfgb)

    if score_cardinality > cohort_size:
        grp_size = score_cardinality / cohort_size
        df['cohort1'] = df[score_col] // grp_size

    if score_cardinality > cohort_size_2:
        grp_size = score_cardinality / cohort_size_2
        df['cohort2'] = df[score_col] // grp_size
        
    if score_cardinality > cohort_size_3:
        grp_size = score_cardinality / cohort_size_3
        df['cohort3'] = df[score_col] // grp_size

    return df

In [43]:
def df_search(df_in, cohort):
    if not cohort in df_in:
        return df_in

    df_class = df_bandit_class(df_in, cohort)
    print(df_class)
    next_class = df_class.iloc[0, 0]
    print(next_class)
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == next_class]
    else:
        df_filtered = df_in

    return df_filtered

In [44]:
# todo: combine bandit_class and bandit_diff
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [45]:
currDate = datetime.today()
goal_list = [y for y in range(1,26)]
df = pd.DataFrame({"Goal":goal_list})
df["Date"] = currDate
df["Complete"] = np.nan
df = df[['Date', 'Goal', 'Complete']]

In [46]:
subprocess.Popen('GoalSetter2.xlsx', shell=True)

<subprocess.Popen at 0x136969b0>

In [47]:
df_excel = pd.read_excel(io='GoalSetter2.xlsx')
df = df.append(df_excel).reset_index(drop=True)

In [48]:
df = bd.reduce(df,'Goal')

54 0.865 47


In [49]:
df.tail(10)

Unnamed: 0,Date,Goal,Complete
69,2018-09-24,8,9.0
70,2018-09-26,9,9.0
71,2018-09-27,13,7.0
72,2018-09-28,15,9.0
73,2018-09-29,8,9.0
74,2018-09-30,9,17.0
75,2018-10-01,9,9.0
76,2018-10-02,7,7.0
77,2018-10-03,8,8.0
78,2018-10-04,8,


In [50]:
df['Score'] = np.where(df['Complete'] >= df['Goal'], df['Complete']**1.2, df['Complete'] / 5)

In [51]:
df_quality = bd.get_quality(df['Score'])

In [52]:
df = df.join(df_quality).sort_values('Quality', ascending=False)

In [53]:
df = set_cohorts(df, 'Goal')

In [54]:
currDay = calendar.day_name[currDate.weekday()]
df_today = df
df_today['weekday'] = df_today['Date'].dt.day_name()
df_today = df_today[df_today['weekday'] == currDay]
today_sum = df_today['Complete'].notnull().sum()
if today_sum >= 10:
    df = df_today

In [55]:
df.sort_values('Date', ascending=True).tail(10)

Unnamed: 0,index,Date,Goal,Complete,Score,Quality,cohort1,cohort2,weekday
61,17,2018-10-05 17:21:34.354168,18,,,,2.0,6.0,Friday
60,16,2018-10-05 17:21:34.354168,17,,,,2.0,6.0,Friday
36,10,2018-10-05 17:21:34.354168,11,,,,1.0,3.0,Friday
5,5,2018-10-05 17:21:34.354168,6,,,,0.0,2.0,Friday
57,15,2018-10-05 17:21:34.354168,16,,,,1.0,5.0,Friday
55,14,2018-10-05 17:21:34.354168,15,,,,1.0,5.0,Friday
41,11,2018-10-05 17:21:34.354168,12,,,,1.0,4.0,Friday
44,12,2018-10-05 17:21:34.354168,13,,,,1.0,4.0,Friday
70,23,2018-10-05 17:21:34.354168,24,,,,2.0,8.0,Friday
71,24,2018-10-05 17:21:34.354168,25,,,,2.0,9.0,Friday


In [56]:
df = df.sort_values('Quality', ascending=False)
df.head(10)

Unnamed: 0,index,Date,Goal,Complete,Score,Quality,cohort1,cohort2,weekday
22,52,2018-09-07,9,21.0,38.606742,1.0,1.0,3.0,Friday
40,61,2018-09-16,12,17.0,29.959786,0.776025,1.0,4.0,Sunday
25,74,2018-09-30,9,17.0,29.959786,0.776025,1.0,3.0,Sunday
43,41,2018-08-26,13,16.0,27.857618,0.721574,1.0,4.0,Sunday
54,36,2018-08-19,15,15.0,25.781579,0.6678,1.0,5.0,Sunday
46,40,2018-08-25,13,15.0,25.781579,0.6678,1.0,4.0,Saturday
31,31,2018-08-15,10,14.0,23.733055,0.614739,1.0,3.0,Wednesday
51,34,2018-08-24,13,14.0,23.733055,0.614739,1.0,4.0,Friday
7,53,2018-09-08,7,13.0,21.713609,0.562431,0.0,2.0,Saturday
50,44,2018-08-30,13,13.0,21.713609,0.562431,1.0,4.0,Thursday


In [57]:
df_step1 = df_search(df,'cohort1')
df_step1

   cohort1  count      mean       var  total    donext
2      2.0      4  0.031083  0.000555     53  0.529222
0      0.0     18  0.269625  0.027000     53  0.504450
1      1.0     31  0.324793  0.094427     53  0.503730
2.0


Unnamed: 0,index,Date,Goal,Complete,Score,Quality,cohort1,cohort2,weekday
64,57,2018-09-12 00:00:00.000000,19,10.0,2.0,0.051804,2.0,6.0,Wednesday
59,38,2018-08-21 00:00:00.000000,17,9.0,1.8,0.046624,2.0,6.0,Tuesday
62,39,2018-08-22 00:00:00.000000,18,5.0,1.0,0.025902,2.0,6.0,Wednesday
65,64,2018-09-19 00:00:00.000000,20,0.0,0.0,0.0,2.0,7.0,Wednesday
60,16,2018-10-05 17:21:34.354168,17,,,,2.0,6.0,Friday
61,17,2018-10-05 17:21:34.354168,18,,,,2.0,6.0,Friday
63,18,2018-10-05 17:21:34.354168,19,,,,2.0,6.0,Friday
66,19,2018-10-05 17:21:34.354168,20,,,,2.0,7.0,Friday
67,20,2018-10-05 17:21:34.354168,21,,,,2.0,7.0,Friday
68,21,2018-10-05 17:21:34.354168,22,,,,2.0,7.0,Friday


In [58]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

   cohort2  count      mean       var  total      donext
2      8.0      0       NaN       NaN      4  999.000000
3      9.0      0       NaN       NaN      4  999.000000
1      7.0      1  0.000000       NaN      4    0.588705
0      6.0      3  0.041444  0.000188      4    0.381333
8.0


Unnamed: 0,index,Date,Goal,Complete,Score,Quality,cohort1,cohort2,weekday
69,22,2018-10-05 17:21:34.354168,23,,,,2.0,8.0,Friday
70,23,2018-10-05 17:21:34.354168,24,,,,2.0,8.0,Friday


In [59]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

Unnamed: 0,index,Date,Goal,Complete,Score,Quality,cohort1,cohort2,weekday
69,22,2018-10-05 17:21:34.354168,23,,,,2.0,8.0,Friday
70,23,2018-10-05 17:21:34.354168,24,,,,2.0,8.0,Friday


In [60]:
df_bandit_class(df_step3, 'Goal')

Unnamed: 0,Goal,count,mean,var,total,donext
0,23,0,,,0,999.0
1,24,0,,,0,999.0
