In [601]:
import pandas as pd
import numpy as np
import math
from datetime import date, time, datetime, timedelta
import subprocess
import bandit.main as bd
import calendar

In [602]:
def set_cohorts(source_df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # break out distinct scores
    uniq_list = source_df[score_col].unique()
    df = pd.DataFrame({score_col : uniq_list}).sort_values(score_col, ascending=True)
    df = df.sort_values (score_col)
    df = df.reset_index (drop=True)
    
    if len (df) > cohort_size:
        grp_size = len (df) / cohort_size
        df['cohort1'] = df.index // grp_size

    if len (df) > cohort_size_2:
        grp_size = len (df) / cohort_size_2
        df['cohort2'] = df.index // grp_size
        
    if len (df) > cohort_size_3:
        grp_size = len (df) / cohort_size_3
        df['cohort3'] = df.index // grp_size

    return df

In [603]:
def df_search(df_in, cohort):
    if not cohort in df_in:
        return df_in

    df_class = df_bandit_class(df_in, cohort)
    print(df_class)
    next_class = df_class.iloc[0, 0]
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == next_class]
    else:
        df_filtered = df_in
        
    return df_filtered

In [604]:
def set_start(df):
    goal_minutes = df['Goal']
    final = datetime.combine(date.today(), time(0, 0)) + timedelta(minutes=goal_minutes)
    return final

In [605]:
# todo: combine bandit_class and bandit_diff
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    
    # set up the time
    # dfgb['CanStart'] = dfgb.apply(set_start, axis=1)
    
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [606]:
# timelist = [datetime.combine(date.today(), time(0, 0)) + timedelta(minutes=i) for i in  range(0,1440,5)]
minute_list = [i for i in  range(5,1440,5)]
df = pd.DataFrame({"Start":minute_list})

In [607]:
currDate = datetime.today()
minute_list = [i for i in  range(5,1440,5)]
df = pd.DataFrame({"Goal":minute_list})
df["Date"] = currDate
df["Complete"] = np.nan
df = df[['Date', 'Goal', 'Complete']]

In [608]:
subprocess.Popen('StopSmoking.xlsx', shell=True)

<subprocess.Popen at 0x149b5d30>

In [609]:
df_excel = pd.read_excel(io='StopSmoking.xlsx')
df = df.append(df_excel).reset_index(drop=True)

In [610]:
df = bd.reduce(df,'Goal')

1 0.9975 1


In [611]:
df.tail(10)

Unnamed: 0,Date,Goal,Complete
278,2018-10-06 02:13:33.213988,1395,
279,2018-10-06 02:13:33.213988,1400,
280,2018-10-06 02:13:33.213988,1405,
281,2018-10-06 02:13:33.213988,1410,
282,2018-10-06 02:13:33.213988,1415,
283,2018-10-06 02:13:33.213988,1420,
284,2018-10-06 02:13:33.213988,1425,
285,2018-10-06 02:13:33.213988,1430,
286,2018-10-06 02:13:33.213988,1435,
287,2018-10-06 00:00:00.000000,5,1.0


In [612]:
df['Score'] = np.where(df['Complete'] >= 1, df['Goal']**1.2, 0)
df.loc[df['Complete'].isnull(), 'Score'] = np.NaN

In [613]:
df_quality = bd.get_quality(df['Score'])

In [614]:
df = df.join(df_quality).sort_values('Quality', ascending=False)

In [615]:
val_df = set_cohorts(df, 'Goal')
df = df.merge(val_df, on='Goal')

In [616]:
currDay = calendar.day_name[currDate.weekday()]
df_today = df
df_today['weekday'] = df_today['Date'].dt.day_name()
df_today = df_today[df_today['weekday'] == currDay]
today_sum = df_today['Complete'].notnull().sum()
if today_sum >= 10:
    df = df_today

In [617]:
df.sort_values('Date', ascending=True).tail(10)

Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
98,2018-10-06 02:13:33.213988,490,,,,1.0,3.0,9.0,Saturday
97,2018-10-06 02:13:33.213988,485,,,,1.0,3.0,9.0,Saturday
96,2018-10-06 02:13:33.213988,480,,,,0.0,2.0,8.0,Saturday
95,2018-10-06 02:13:33.213988,475,,,,0.0,2.0,8.0,Saturday
94,2018-10-06 02:13:33.213988,470,,,,0.0,2.0,8.0,Saturday
93,2018-10-06 02:13:33.213988,465,,,,0.0,2.0,8.0,Saturday
92,2018-10-06 02:13:33.213988,460,,,,0.0,2.0,8.0,Saturday
91,2018-10-06 02:13:33.213988,455,,,,0.0,2.0,8.0,Saturday
286,2018-10-06 02:13:33.213988,1430,,,,2.0,8.0,26.0,Saturday
287,2018-10-06 02:13:33.213988,1435,,,,2.0,8.0,26.0,Saturday


In [618]:
df = df.sort_values('Quality', ascending=False)
df.head(10)

Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
0,2018-10-06 00:00:00.000000,5,1.0,6.898648,0.5,-0.0,-0.0,-0.0,Saturday
1,2018-10-06 02:13:33.213988,5,,,,-0.0,-0.0,-0.0,Saturday
2,2018-10-06 02:13:33.213988,10,,,,0.0,0.0,0.0,Saturday
3,2018-10-06 02:13:33.213988,15,,,,0.0,0.0,0.0,Saturday
4,2018-10-06 02:13:33.213988,20,,,,0.0,0.0,0.0,Saturday
5,2018-10-06 02:13:33.213988,25,,,,0.0,0.0,0.0,Saturday
6,2018-10-06 02:13:33.213988,30,,,,0.0,0.0,0.0,Saturday
7,2018-10-06 02:13:33.213988,35,,,,0.0,0.0,0.0,Saturday
8,2018-10-06 02:13:33.213988,40,,,,0.0,0.0,0.0,Saturday
9,2018-10-06 02:13:33.213988,45,,,,0.0,0.0,0.0,Saturday


In [619]:
df_step1 = df_search(df,'cohort1')
df_step1

   cohort1  count  mean  var  total  donext
1      1.0      0   NaN  NaN      1   999.0
2      2.0      0   NaN  NaN      1   999.0
0     -0.0      1   0.5  NaN      1     0.5


Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
97,2018-10-06 02:13:33.213988,485,,,,1.0,3.0,9.0,Saturday
98,2018-10-06 02:13:33.213988,490,,,,1.0,3.0,9.0,Saturday
99,2018-10-06 02:13:33.213988,495,,,,1.0,3.0,9.0,Saturday
100,2018-10-06 02:13:33.213988,500,,,,1.0,3.0,9.0,Saturday
101,2018-10-06 02:13:33.213988,505,,,,1.0,3.0,9.0,Saturday
102,2018-10-06 02:13:33.213988,510,,,,1.0,3.0,9.0,Saturday
103,2018-10-06 02:13:33.213988,515,,,,1.0,3.0,9.0,Saturday
104,2018-10-06 02:13:33.213988,520,,,,1.0,3.0,9.0,Saturday
105,2018-10-06 02:13:33.213988,525,,,,1.0,3.0,9.0,Saturday
106,2018-10-06 02:13:33.213988,530,,,,1.0,3.0,9.0,Saturday


In [620]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

   cohort2  count  mean  var  total  donext
0      3.0      0   NaN  NaN      0   999.0
1      4.0      0   NaN  NaN      0   999.0
2      5.0      0   NaN  NaN      0   999.0


Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
97,2018-10-06 02:13:33.213988,485,,,,1.0,3.0,9.0,Saturday
98,2018-10-06 02:13:33.213988,490,,,,1.0,3.0,9.0,Saturday
99,2018-10-06 02:13:33.213988,495,,,,1.0,3.0,9.0,Saturday
100,2018-10-06 02:13:33.213988,500,,,,1.0,3.0,9.0,Saturday
101,2018-10-06 02:13:33.213988,505,,,,1.0,3.0,9.0,Saturday
102,2018-10-06 02:13:33.213988,510,,,,1.0,3.0,9.0,Saturday
103,2018-10-06 02:13:33.213988,515,,,,1.0,3.0,9.0,Saturday
104,2018-10-06 02:13:33.213988,520,,,,1.0,3.0,9.0,Saturday
105,2018-10-06 02:13:33.213988,525,,,,1.0,3.0,9.0,Saturday
106,2018-10-06 02:13:33.213988,530,,,,1.0,3.0,9.0,Saturday


In [621]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

   cohort3  count  mean  var  total  donext
0      9.0      0   NaN  NaN      0   999.0
1     10.0      0   NaN  NaN      0   999.0
2     11.0      0   NaN  NaN      0   999.0


Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
97,2018-10-06 02:13:33.213988,485,,,,1.0,3.0,9.0,Saturday
98,2018-10-06 02:13:33.213988,490,,,,1.0,3.0,9.0,Saturday
99,2018-10-06 02:13:33.213988,495,,,,1.0,3.0,9.0,Saturday
100,2018-10-06 02:13:33.213988,500,,,,1.0,3.0,9.0,Saturday
101,2018-10-06 02:13:33.213988,505,,,,1.0,3.0,9.0,Saturday
102,2018-10-06 02:13:33.213988,510,,,,1.0,3.0,9.0,Saturday
103,2018-10-06 02:13:33.213988,515,,,,1.0,3.0,9.0,Saturday
104,2018-10-06 02:13:33.213988,520,,,,1.0,3.0,9.0,Saturday
105,2018-10-06 02:13:33.213988,525,,,,1.0,3.0,9.0,Saturday
106,2018-10-06 02:13:33.213988,530,,,,1.0,3.0,9.0,Saturday


In [622]:
df = df_bandit_class(df_step3, 'Goal')
df['CanStart'] = df.apply(set_start, axis=1).dt.strftime('%I:%M %p')
df

Unnamed: 0,Goal,count,mean,var,total,donext,CanStart
0,485,0,,,0,999.0,08:05 AM
1,490,0,,,0,999.0,08:10 AM
2,495,0,,,0,999.0,08:15 AM
3,500,0,,,0,999.0,08:20 AM
4,505,0,,,0,999.0,08:25 AM
5,510,0,,,0,999.0,08:30 AM
6,515,0,,,0,999.0,08:35 AM
7,520,0,,,0,999.0,08:40 AM
8,525,0,,,0,999.0,08:45 AM
9,530,0,,,0,999.0,08:50 AM
