In [1]:
import pandas as pd
import numpy as np
import math
from datetime import date, time, datetime, timedelta
import subprocess
import bandit.main as bd
import calendar

In [2]:
def set_cohorts(source_df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # break out distinct scores
    uniq_list = source_df[score_col].unique()
    df = pd.DataFrame({score_col : uniq_list}).sort_values(score_col, ascending=True)
    df = df.sort_values (score_col)
    df = df.reset_index (drop=True)
    
    if len (df) > cohort_size:
        grp_size = len (df) / cohort_size
        df['cohort1'] = df.index // grp_size

    if len (df) > cohort_size_2:
        grp_size = len (df) / cohort_size_2
        df['cohort2'] = df.index // grp_size
        
    if len (df) > cohort_size_3:
        grp_size = len (df) / cohort_size_3
        df['cohort3'] = df.index // grp_size

    return df

In [3]:
def df_search(df_in, cohort):
    if not cohort in df_in:
        return df_in

    df_class = df_bandit_class(df_in, cohort)
    print(df_class)
    next_class = df_class.iloc[0, 0]
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == next_class]
    else:
        df_filtered = df_in
        
    return df_filtered

In [4]:
def set_start(df):
    goal_minutes = df['Goal']
    final = datetime.combine(date.today(), time(0, 0)) + timedelta(minutes=goal_minutes)
    return final

In [5]:
# todo: combine bandit_class and bandit_diff
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    
    # set up the time
    # dfgb['CanStart'] = dfgb.apply(set_start, axis=1)
    
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [6]:
# timelist = [datetime.combine(date.today(), time(0, 0)) + timedelta(minutes=i) for i in  range(0,1440,5)]
minute_list = [i for i in  range(5,1440,5)]
df = pd.DataFrame({"Start":minute_list})

In [7]:
currDate = datetime.today()
minute_list = [i for i in  range(5,1440,5)]
df = pd.DataFrame({"Goal":minute_list})
df["Date"] = currDate
df["Complete"] = np.nan
df = df[['Date', 'Goal', 'Complete']]

In [8]:
subprocess.Popen('StopSmoking.xlsx', shell=True)

<subprocess.Popen at 0x134b4940>

In [9]:
df_excel = pd.read_excel(io='StopSmoking.xlsx')
df = df.append(df_excel).reset_index(drop=True)

In [10]:
df = bd.reduce(df,'Goal')

4 0.99 4


In [11]:
df.tail(10)

Unnamed: 0,Date,Goal,Complete
281,2018-10-09 06:04:27.898516,1410,
282,2018-10-09 06:04:27.898516,1415,
283,2018-10-09 06:04:27.898516,1420,
284,2018-10-09 06:04:27.898516,1425,
285,2018-10-09 06:04:27.898516,1430,
286,2018-10-09 06:04:27.898516,1435,
287,2018-10-06 00:00:00.000000,5,1.0
288,2018-10-07 00:00:00.000000,485,1.0
289,2018-10-08 00:00:00.000000,965,1.0
290,2018-10-08 00:00:00.000000,1125,


In [12]:
df['Score'] = np.where(df['Complete'] >= 1, df['Goal']**1.2, 0)
df.loc[df['Complete'].isnull(), 'Score'] = np.NaN

In [13]:
df_quality = bd.get_quality(df['Score'])

In [14]:
df = df.join(df_quality).sort_values('Quality', ascending=False)

In [15]:
val_df = set_cohorts(df, 'Goal')
df = df.merge(val_df, on='Goal')

In [16]:
currDay = calendar.day_name[currDate.weekday()]
df_today = df
df_today['weekday'] = df_today['Date'].dt.day_name()
df_today = df_today[df_today['weekday'] == currDay]
today_sum = df_today['Complete'].notnull().sum()
if today_sum >= 10:
    df = df_today

In [17]:
df.sort_values('Date', ascending=True).tail(10)

Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
100,2018-10-09 06:04:27.898516,480,,,,0.0,2.0,8.0,Tuesday
99,2018-10-09 06:04:27.898516,475,,,,0.0,2.0,8.0,Tuesday
98,2018-10-09 06:04:27.898516,470,,,,0.0,2.0,8.0,Tuesday
97,2018-10-09 06:04:27.898516,465,,,,0.0,2.0,8.0,Tuesday
96,2018-10-09 06:04:27.898516,460,,,,0.0,2.0,8.0,Tuesday
95,2018-10-09 06:04:27.898516,455,,,,0.0,2.0,8.0,Tuesday
94,2018-10-09 06:04:27.898516,450,,,,0.0,2.0,8.0,Tuesday
93,2018-10-09 06:04:27.898516,445,,,,0.0,2.0,8.0,Tuesday
289,2018-10-09 06:04:27.898516,1430,,,,2.0,8.0,26.0,Tuesday
290,2018-10-09 06:04:27.898516,1435,,,,2.0,8.0,26.0,Tuesday


In [18]:
df = df.sort_values('Quality', ascending=False)
df.head(10)

Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
0,2018-10-08 00:00:00.000000,965,1.0,3814.457461,1.0,2.0,6.0,18.0,Monday
2,2018-10-07 00:00:00.000000,485,1.0,1670.667739,0.436965,1.0,3.0,9.0,Sunday
4,2018-10-06 00:00:00.000000,5,1.0,6.898648,0.0,-0.0,-0.0,-0.0,Saturday
1,2018-10-09 06:04:27.898516,965,,,,2.0,6.0,18.0,Tuesday
3,2018-10-09 06:04:27.898516,485,,,,1.0,3.0,9.0,Tuesday
5,2018-10-09 06:04:27.898516,5,,,,-0.0,-0.0,-0.0,Tuesday
6,2018-10-09 06:04:27.898516,10,,,,0.0,0.0,0.0,Tuesday
7,2018-10-09 06:04:27.898516,15,,,,0.0,0.0,0.0,Tuesday
8,2018-10-09 06:04:27.898516,20,,,,0.0,0.0,0.0,Tuesday
9,2018-10-09 06:04:27.898516,25,,,,0.0,0.0,0.0,Tuesday


In [19]:
df_step1 = df_search(df,'cohort1')
df_step1

   cohort1  count      mean  var  total    donext
2      2.0      1  1.000000  NaN      3  1.524074
1      1.0      1  0.436965  NaN      3  0.961038
0     -0.0      1  0.000000  NaN      3  0.524074


Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
0,2018-10-08 00:00:00.000000,965,1.0,3814.457461,1.0,2.0,6.0,18.0,Monday
1,2018-10-09 06:04:27.898516,965,,,,2.0,6.0,18.0,Tuesday
196,2018-10-09 06:04:27.898516,970,,,,2.0,6.0,18.0,Tuesday
197,2018-10-09 06:04:27.898516,975,,,,2.0,6.0,18.0,Tuesday
198,2018-10-09 06:04:27.898516,980,,,,2.0,6.0,18.0,Tuesday
199,2018-10-09 06:04:27.898516,985,,,,2.0,6.0,18.0,Tuesday
200,2018-10-09 06:04:27.898516,990,,,,2.0,6.0,18.0,Tuesday
201,2018-10-09 06:04:27.898516,995,,,,2.0,6.0,18.0,Tuesday
202,2018-10-09 06:04:27.898516,1000,,,,2.0,6.0,18.0,Tuesday
203,2018-10-09 06:04:27.898516,1005,,,,2.0,6.0,18.0,Tuesday


In [20]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

   cohort2  count  mean  var  total  donext
1      7.0      0   NaN  NaN      1   999.0
2      8.0      0   NaN  NaN      1   999.0
0      6.0      1   1.0  NaN      1     1.0


Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
227,2018-10-09 06:04:27.898516,1125,,,,2.0,7.0,21.0,Tuesday
228,2018-10-08 00:00:00.000000,1125,,,,2.0,7.0,21.0,Monday
229,2018-10-09 06:04:27.898516,1130,,,,2.0,7.0,21.0,Tuesday
230,2018-10-09 06:04:27.898516,1135,,,,2.0,7.0,21.0,Tuesday
231,2018-10-09 06:04:27.898516,1140,,,,2.0,7.0,21.0,Tuesday
232,2018-10-09 06:04:27.898516,1145,,,,2.0,7.0,21.0,Tuesday
233,2018-10-09 06:04:27.898516,1150,,,,2.0,7.0,21.0,Tuesday
234,2018-10-09 06:04:27.898516,1155,,,,2.0,7.0,21.0,Tuesday
235,2018-10-09 06:04:27.898516,1160,,,,2.0,7.0,21.0,Tuesday
236,2018-10-09 06:04:27.898516,1165,,,,2.0,7.0,21.0,Tuesday


In [21]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

   cohort3  count  mean  var  total  donext
0     21.0      0   NaN  NaN      0   999.0
1     22.0      0   NaN  NaN      0   999.0
2     23.0      0   NaN  NaN      0   999.0


Unnamed: 0,Date,Goal,Complete,Score,Quality,cohort1,cohort2,cohort3,weekday
227,2018-10-09 06:04:27.898516,1125,,,,2.0,7.0,21.0,Tuesday
228,2018-10-08 00:00:00.000000,1125,,,,2.0,7.0,21.0,Monday
229,2018-10-09 06:04:27.898516,1130,,,,2.0,7.0,21.0,Tuesday
230,2018-10-09 06:04:27.898516,1135,,,,2.0,7.0,21.0,Tuesday
231,2018-10-09 06:04:27.898516,1140,,,,2.0,7.0,21.0,Tuesday
232,2018-10-09 06:04:27.898516,1145,,,,2.0,7.0,21.0,Tuesday
233,2018-10-09 06:04:27.898516,1150,,,,2.0,7.0,21.0,Tuesday
234,2018-10-09 06:04:27.898516,1155,,,,2.0,7.0,21.0,Tuesday
235,2018-10-09 06:04:27.898516,1160,,,,2.0,7.0,21.0,Tuesday
236,2018-10-09 06:04:27.898516,1165,,,,2.0,7.0,21.0,Tuesday


In [22]:
df = df_bandit_class(df_step3, 'Goal')
df['CanStart'] = df.apply(set_start, axis=1).dt.strftime('%I:%M %p')
df

Unnamed: 0,Goal,count,mean,var,total,donext,CanStart
0,1125,0,,,0,999.0,06:45 PM
1,1130,0,,,0,999.0,06:50 PM
2,1135,0,,,0,999.0,06:55 PM
3,1140,0,,,0,999.0,07:00 PM
4,1145,0,,,0,999.0,07:05 PM
5,1150,0,,,0,999.0,07:10 PM
6,1155,0,,,0,999.0,07:15 PM
7,1160,0,,,0,999.0,07:20 PM
8,1165,0,,,0,999.0,07:25 PM
9,1170,0,,,0,999.0,07:30 PM
