In [31]:
import pandas as pd
import numpy as np
import subprocess
from datetime import datetime
import calendar
import bandit.main as bd

In [32]:
def set_cohorts(source_df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # break out distinct scores
    uniq_list = source_df[score_col].unique()
    df = pd.DataFrame({score_col : uniq_list}).sort_values(score_col, ascending=True)
    df = df.sort_values (score_col)
    df = df.reset_index (drop=True)
    
    if len (df) > cohort_size:
        grp_size = len (df) / cohort_size
        df['cohort1'] = df.index // grp_size

    if len (df) > cohort_size_2:
        grp_size = len (df) / cohort_size_2
        df['cohort2'] = df.index // grp_size
        
    if len (df) > cohort_size_3:
        grp_size = len (df) / cohort_size_3
        df['cohort3'] = df.index // grp_size

    return df

In [33]:
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [34]:
def df_search(df_in, cohort):
    if not cohort in df_in:
        return df_in

    df_class = df_bandit_class(df_in, cohort)
    print(df_class)
    next_class = df_class.iloc[0, 0]
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == next_class]
    else:
        df_filtered = df_in

    return df_filtered

In [35]:
def rate_calc(df):
    pedal_time = df['Goal'] * df['seconds']
    rest_time = (df['Goal'] - 1) * 15
    total_time = pedal_time + rest_time
    rate = pedal_time / total_time
    score = pedal_time * rate
    return int(score)

In [36]:
currDate = datetime.today()
goal_list = [[x, y] for x in range(1,31) for y in [30,45,60,90,120]]
df = pd.DataFrame.from_records(goal_list).rename(index=str, columns={0: "Goal", 1: "seconds"})
df['Date'] = currDate
df['Complete'] = np.nan
df = df[['Date','Goal','seconds','Complete']]

In [37]:
df_excel = pd.read_excel(io='BikeGoals.xlsx')
foo = df_excel.groupby(['Goal','seconds']).count()
foo['remove'] = np.where(foo['Complete'] > 0, True, False)
foo = foo[['remove']]
df = df.merge(foo,on=['Goal','seconds'], how='left')
df = df[df['remove'] != True].drop(['remove'],axis=1)
df

Unnamed: 0,Date,Goal,seconds,Complete
1,2018-10-09 18:21:37.708317,1,45,
2,2018-10-09 18:21:37.708317,1,60,
3,2018-10-09 18:21:37.708317,1,90,
4,2018-10-09 18:21:37.708317,1,120,
5,2018-10-09 18:21:37.708317,2,30,
6,2018-10-09 18:21:37.708317,2,45,
7,2018-10-09 18:21:37.708317,2,60,
8,2018-10-09 18:21:37.708317,2,90,
9,2018-10-09 18:21:37.708317,2,120,
10,2018-10-09 18:21:37.708317,3,30,


In [38]:
df = df.append(df_excel).reset_index(drop=True)

In [39]:
df['Value'] = df.apply(rate_calc, axis=1)
val_df = set_cohorts(df, 'Value')
df = df.merge(val_df, on='Value')

In [40]:
subprocess.Popen('BikeGoals.xlsx', shell=True)

<subprocess.Popen at 0x136102e8>

In [41]:
# todo - score partial runs
df['Score'] = np.where(df['Complete'] >= df['Goal'], df['Value']**1.2, 0)
df.loc[df['Complete'].isnull(), 'Score'] = np.NaN
df['Class'] = df['Goal'].map(str) + '-' + df['seconds'].map(str)
df = bd.reduce(df,'Class')

0 1.0 0


In [42]:
df_quality = bd.get_quality(df['Score'])
df = df.join(df_quality)

In [43]:
df_step1 = df_search(df,'cohort1')
df_step1

   cohort1  count      mean       var  total    donext
0      0.0      4  0.397473  0.219653      7  0.746212
1      1.0      1  0.000000       NaN      7  0.697479
2      2.0      2  0.000000  0.000000      7  0.493192


Unnamed: 0,Date,Goal,seconds,Complete,Value,cohort1,cohort2,cohort3,Score,Class,Quality
0,2018-10-09 18:21:37.708317,1,45,,45,0.0,0.0,0.0,,1-45,
1,2018-10-09 18:21:37.708317,1,60,,60,0.0,0.0,0.0,,1-60,
2,2018-10-09 18:21:37.708317,1,90,,90,0.0,0.0,1.0,,1-90,
3,2018-10-09 18:21:37.708317,1,120,,120,0.0,0.0,2.0,,1-120,
4,2018-10-09 18:21:37.708317,2,30,,48,0.0,0.0,0.0,,2-30,
5,2018-10-09 18:21:37.708317,2,45,,77,0.0,0.0,0.0,,2-45,
6,2018-10-09 18:21:37.708317,2,60,,106,0.0,0.0,1.0,,2-60,
7,2018-10-09 18:21:37.708317,2,90,,166,0.0,0.0,2.0,,2-90,
8,2018-10-09 18:21:37.708317,8,30,,166,0.0,0.0,2.0,,8-30,
9,2018-10-09 18:21:37.708317,2,120,,225,0.0,1.0,4.0,,2-120,


In [44]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

   cohort2  count      mean  var  total    donext
1      1.0      1  0.536722  NaN      4  1.125427
2      2.0      2  0.500000  0.5      4  0.916277
0      0.0      1  0.053168  NaN      4  0.641873


Unnamed: 0,Date,Goal,seconds,Complete,Value,cohort1,cohort2,cohort3,Score,Class,Quality
9,2018-10-09 18:21:37.708317,2,120,,225,0.0,1.0,4.0,,2-120,
13,2018-10-09 18:21:37.708317,3,90,,243,0.0,1.0,4.0,,3-90,
17,2018-10-09 18:21:37.708317,4,60,,202,0.0,1.0,3.0,,4-60,
21,2018-10-09 18:21:37.708317,5,45,,177,0.0,1.0,3.0,,5-45,
22,2018-10-09 18:21:37.708317,5,60,,250,0.0,1.0,4.0,,5-60,
26,2018-10-09 18:21:37.708317,6,45,,211,0.0,1.0,3.0,,6-45,
27,2018-10-09 18:21:37.708317,6,60,,297,0.0,1.0,5.0,,6-60,
31,2018-10-09 18:21:37.708317,7,45,,245,0.0,1.0,4.0,,7-45,
35,2018-10-09 18:21:37.708317,8,45,,278,0.0,1.0,5.0,,8-45,
39,2018-10-09 18:21:37.708317,9,30,,186,0.0,1.0,3.0,,9-30,


In [45]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

   cohort3  count      mean  var  total      donext
1      4.0      0       NaN  NaN      1  999.000000
2      5.0      0       NaN  NaN      1  999.000000
0      3.0      1  0.536722  NaN      1    0.536722


Unnamed: 0,Date,Goal,seconds,Complete,Value,cohort1,cohort2,cohort3,Score,Class,Quality
9,2018-10-09 18:21:37.708317,2,120,,225,0.0,1.0,4.0,,2-120,
13,2018-10-09 18:21:37.708317,3,90,,243,0.0,1.0,4.0,,3-90,
22,2018-10-09 18:21:37.708317,5,60,,250,0.0,1.0,4.0,,5-60,
31,2018-10-09 18:21:37.708317,7,45,,245,0.0,1.0,4.0,,7-45,
46,2018-10-09 18:21:37.708317,11,30,,226,0.0,1.0,4.0,,11-30,
50,2018-10-09 18:21:37.708317,12,30,,246,0.0,1.0,4.0,,12-30,


In [46]:
df_bandit_class(df_step3, 'Class')

Unnamed: 0,Class,count,mean,var,total,donext
0,11-30,0,,,0,999.0
1,12-30,0,,,0,999.0
2,2-120,0,,,0,999.0
3,3-90,0,,,0,999.0
4,5-60,0,,,0,999.0
5,7-45,0,,,0,999.0
