In [27]:
import pandas as pd
import numpy as np
import subprocess
from datetime import date
import calendar
import bandit.main as bd

In [28]:
def set_cohorts(df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # sort by score
    df = df.sort_values (score_col)
    df = df.reset_index ()

    if len (df) > cohort_size:
        grp_size = len (df) / cohort_size
        df['cohort1'] = df.index // grp_size

    if len (df) > cohort_size_2:
        grp_size = len (df) / cohort_size_2
        df['cohort2'] = df.index // grp_size
        
    if len (df) > cohort_size_3:
        grp_size = len (df) / cohort_size_3
        df['cohort3'] = df.index // grp_size

    return df

In [29]:
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [30]:
def df_search(df_in, cohort):
    df_class = df_bandit_class(df_in,cohort)
    print(df_class)
    nextClass = df_class.iloc[0,0]
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == nextClass]
    else:
        df_filtered = df_in

    return df_filtered

In [31]:
currDate = date.today()
list = [[x, y] for x in range(1,31) for y in [30,45,60,90,120]]

In [32]:
df = pd.DataFrame.from_records(list).rename(index=str, columns={0: "Goal", 1: "seconds"})
df['Date'] = currDate
df['Complete'] = np.nan
df = df[['Date','Goal','seconds','Complete']]
# df.to_excel('Outputtest.xlsx')
sheet_info = pd.read_excel(io='BikeGoals.xlsx')
df = df.append(sheet_info).reset_index(drop=True)

In [33]:
subprocess.Popen('BikeGoals.xlsx', shell=True)

<subprocess.Popen at 0x1365ff98>

In [34]:
df['Value'] = df['Goal'] * (df['seconds'] - 10)
df = set_cohorts(df, 'Value')
df['Score'] = np.where(df['Complete'] >= df['Goal'], df['Value']**1.2, df['Value'] / 5)
df.loc[df['Complete'].isnull(), 'Score'] = np.NaN
df['Class'] = df['Goal'].map(str) + '-' + df['seconds'].map(str)
df = bd.reduce(df,'Class')

3 0.9925 3


In [35]:
df_quality = bd.get_quality(df['Score'])
df = df.join(df_quality)

In [36]:
df_step1 = df_search(df,'cohort1')
df_step1

   cohort1  count      mean  var  total    donext
2      2.0      1  1.000000  NaN      3  1.524074
1      1.0      1  0.297342  NaN      3  0.821416
0     -0.0      1  0.000000  NaN      3  0.524074


Unnamed: 0,index,Date,Goal,seconds,Complete,Value,cohort1,cohort2,cohort3,Score,Class,Quality
102,63,2018-10-04,13,90,,1040,2.0,6.0,17.0,,13-90,
103,146,2018-10-04,30,45,,1050,2.0,6.0,18.0,,30-45,
104,102,2018-10-04,21,60,,1050,2.0,6.0,18.0,,21-60,
105,107,2018-10-04,22,60,,1100,2.0,6.0,18.0,,22-60,
106,152,2018-10-04 00:00:00,10,120,3.0,1100,2.0,6.0,18.0,220.0,10-120,1.0
107,49,2018-10-04,10,120,,1100,2.0,6.0,18.0,,10-120,
108,68,2018-10-04,14,90,,1120,2.0,6.0,19.0,,14-90,
109,112,2018-10-04,23,60,,1150,2.0,6.0,19.0,,23-60,
110,73,2018-10-04,15,90,,1200,2.0,6.0,19.0,,15-90,
111,117,2018-10-04,24,60,,1200,2.0,6.0,19.0,,24-60,


In [37]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

   cohort2  count  mean  var  total  donext
1      7.0      0   NaN  NaN      1   999.0
2      8.0      0   NaN  NaN      1   999.0
0      6.0      1   1.0  NaN      1     1.0


Unnamed: 0,index,Date,Goal,seconds,Complete,Value,cohort1,cohort2,cohort3,Score,Class,Quality
119,137,2018-10-04,28,60,,1400,2.0,7.0,20.0,,28-60,
120,64,2018-10-04,13,120,,1430,2.0,7.0,21.0,,13-120,
121,88,2018-10-04,18,90,,1440,2.0,7.0,21.0,,18-90,
122,142,2018-10-04,29,60,,1450,2.0,7.0,21.0,,29-60,
123,147,2018-10-04,30,60,,1500,2.0,7.0,21.0,,30-60,
124,93,2018-10-04,19,90,,1520,2.0,7.0,21.0,,19-90,
125,69,2018-10-04,14,120,,1540,2.0,7.0,22.0,,14-120,
126,98,2018-10-04,20,90,,1600,2.0,7.0,22.0,,20-90,
127,74,2018-10-04,15,120,,1650,2.0,7.0,22.0,,15-120,
128,103,2018-10-04,21,90,,1680,2.0,7.0,22.0,,21-90,


In [38]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

   cohort3  count  mean  var  total  donext
0     20.0      0   NaN  NaN      0   999.0
1     21.0      0   NaN  NaN      0   999.0
2     22.0      0   NaN  NaN      0   999.0
3     23.0      0   NaN  NaN      0   999.0


Unnamed: 0,index,Date,Goal,seconds,Complete,Value,cohort1,cohort2,cohort3,Score,Class,Quality
119,137,2018-10-04,28,60,,1400,2.0,7.0,20.0,,28-60,


In [39]:
df_bandit_class(df_step3, 'Class')

Unnamed: 0,Class,count,mean,var,total,donext
0,28-60,0,,,0,999.0
