In [22]:
import pandas as pd
import numpy as np
import subprocess
from datetime import date, datetime
import calendar
import bandit.main as bd

In [23]:
def set_cohorts(source_df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # break out distinct scores
    uniq_list = source_df[score_col].unique()
    df = pd.DataFrame({score_col : uniq_list}).sort_values(score_col, ascending=True)
    df = df.sort_values (score_col)
    df = df.reset_index (drop=True)
    
    if len (df) > cohort_size:
        grp_size = len (df) / cohort_size
        df['cohort1'] = df.index // grp_size

    if len (df) > cohort_size_2:
        grp_size = len (df) / cohort_size_2
        df['cohort2'] = df.index // grp_size
        
    if len (df) > cohort_size_3:
        grp_size = len (df) / cohort_size_3
        df['cohort3'] = df.index // grp_size

    return df

In [24]:
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [25]:
def df_search(df_in, cohort):
    if not cohort in df_in:
        return df_in

    df_class = df_bandit_class(df_in, cohort)
    print(df_class)
    next_class = df_class.iloc[0, 0]
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == next_class]
    else:
        df_filtered = df_in

    return df_filtered

In [26]:
currDate = datetime.today()
goal_list = [y for y in range(1,26)]
df = pd.DataFrame({"Goal":goal_list})
df["Date"] = currDate
df["Complete"] = np.nan
df = df[['Date', 'Goal', 'Complete']]

In [27]:
subprocess.Popen('GoalSetter2.xlsx', shell=True)

<subprocess.Popen at 0x1369aac8>

In [28]:
df_excel = pd.read_excel(io='GoalSetter2.xlsx')
foo = df_excel.groupby('Goal').count()
foo['remove'] = np.where(foo['Complete'] > 0, True, False)
foo = foo[['remove']]
df = df.merge(foo,on='Goal')
df = df[df['remove'] == False].drop(['remove'],axis=1)
df

Unnamed: 0,Date,Goal,Complete
23,2018-10-09 17:06:01.962887,25,


In [29]:
df = df.append(df_excel).reset_index(drop=True)
df['Class'] = df['Goal']

In [30]:
df

Unnamed: 0,Date,Goal,Complete,Class
0,2018-10-09 17:06:01.962887,25,,25
1,2018-08-10 00:00:00.000000,1,1.0,1
2,2018-08-18 00:00:00.000000,2,2.0,2
3,2018-08-01 00:00:00.000000,3,3.0,3
4,2018-08-01 00:00:00.000000,7,7.0,7
5,2018-08-14 00:00:00.000000,8,7.0,8
6,2018-08-14 00:00:00.000000,9,10.0,9
7,2018-08-15 00:00:00.000000,10,14.0,10
8,2018-08-16 00:00:00.000000,11,6.0,11
9,2018-08-01 00:00:00.000000,12,1.0,12


In [31]:
df = bd.reduce(df,'Goal')

36 0.91 33


In [32]:
df.head(10)

Unnamed: 0,Date,Goal,Complete,Class
1,2018-08-10,1,1.0,1
2,2018-08-18,2,2.0,2
3,2018-08-01,3,3.0,3
6,2018-08-14,9,10.0,9
7,2018-08-15,10,14.0,10
8,2018-08-16,11,6.0,11
9,2018-08-01,12,1.0,12
10,2018-08-24,13,14.0,13
11,2018-08-01,14,4.0,14
12,2018-08-19,15,15.0,15


In [33]:
df['Score'] = np.where(df['Complete'] >= df['Goal'], df['Complete']**1.1, df['Complete'] / 5)

In [34]:
df_quality = bd.get_quality(df['Score'])

In [35]:
val_df = set_cohorts(df, 'Goal')
df = df.merge(val_df, on='Goal')

In [36]:
currDay = calendar.day_name[currDate.weekday()]
df_today = df
df_today['weekday'] = df_today['Date'].dt.day_name()
df_today = df_today[df_today['weekday'] == currDay]
today_sum = df_today['Complete'].notnull().sum()
if today_sum >= 10:
    df = df_today

In [37]:
df.sort_values('Date', ascending=True).tail(10)

Unnamed: 0,Date,Goal,Complete,Class,Score,cohort1,cohort2,weekday
9,2018-09-30,9,17.0,9,22.568038,0.0,2.0,Sunday
10,2018-10-01,9,9.0,9,11.211578,0.0,2.0,Monday
41,2018-10-02,7,7.0,7,8.503698,0.0,1.0,Tuesday
48,2018-10-03,8,8.0,8,9.849155,0.0,2.0,Wednesday
49,2018-10-04,8,4.0,8,0.8,0.0,2.0,Thursday
52,2018-10-05,21,21.0,21,28.473524,2.0,7.0,Friday
53,2018-10-06,24,15.0,24,3.0,2.0,8.0,Saturday
54,2018-10-07,22,15.0,22,3.0,2.0,7.0,Sunday
55,2018-10-08,23,9.0,23,1.8,2.0,7.0,Monday
56,2018-10-09,25,,25,,2.0,8.0,Tuesday


In [38]:
df_quality = bd.get_quality(df['Score'])
df = df.join(df_quality)

In [39]:
df_step1 = df_search(df,'cohort1')
df_step1

   cohort1  count      mean       var  total    donext
2      2.0      7  0.197043  0.126764     56  0.576203
0     -0.0     25  0.360674  0.056169     56  0.561306
1      1.0     24  0.277491  0.091647     56  0.482261


Unnamed: 0,Date,Goal,Complete,Class,Score,cohort1,cohort2,weekday,Quality
35,2018-08-22,18,5.0,18,1.0,2.0,6.0,Wednesday,0.03512
43,2018-09-12,19,10.0,19,2.0,2.0,6.0,Wednesday,0.070241
50,2018-09-19,20,0.0,20,0.0,2.0,6.0,Wednesday,0.0
52,2018-10-05,21,21.0,21,28.473524,2.0,7.0,Friday,1.0
53,2018-10-06,24,15.0,24,3.0,2.0,8.0,Saturday,0.105361
54,2018-10-07,22,15.0,22,3.0,2.0,7.0,Sunday,0.105361
55,2018-10-08,23,9.0,23,1.8,2.0,7.0,Monday,0.063217
56,2018-10-09,25,,25,,2.0,8.0,Tuesday,


In [40]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

   cohort2  count      mean       var  total    donext
2      8.0      1  0.105361       NaN      7  0.802840
1      7.0      3  0.389526  0.279953      7  0.792216
0      6.0      3  0.035120  0.001233      7  0.437810


Unnamed: 0,Date,Goal,Complete,Class,Score,cohort1,cohort2,weekday,Quality
53,2018-10-06,24,15.0,24,3.0,2.0,8.0,Saturday,0.105361
56,2018-10-09,25,,25,,2.0,8.0,Tuesday,


In [41]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

Unnamed: 0,Date,Goal,Complete,Class,Score,cohort1,cohort2,weekday,Quality
53,2018-10-06,24,15.0,24,3.0,2.0,8.0,Saturday,0.105361
56,2018-10-09,25,,25,,2.0,8.0,Tuesday,


In [42]:
df_bandit_class(df_step3, 'Class')

Unnamed: 0,Class,count,mean,var,total,donext
1,25,0,,,1,999.0
0,24,1,0.105361,,1,0.105361
