In [None]:
import pandas as pd
import numpy as np
import math
import subprocess
from datetime import date, time, datetime, timedelta
import calendar
import bandit.main as bd

In [None]:
def set_cohorts(source_df, score_col):
    cohort_size = 3
    cohort_size_2 = cohort_size**2
    cohort_size_3 = cohort_size**3

    # break out distinct scores
    uniq_list = source_df[score_col].unique()
    df = pd.DataFrame({score_col : uniq_list}).sort_values(score_col, ascending=True)
    df = df.sort_values (score_col)
    df = df.reset_index (drop=True)
    
    if len (df) > cohort_size:
        grp_size = len (df) / cohort_size
        df['cohort1'] = df.index // grp_size

    if len (df) > cohort_size_2:
        grp_size = len (df) / cohort_size_2
        df['cohort2'] = df.index // grp_size
        
    if len (df) > cohort_size_3:
        grp_size = len (df) / cohort_size_3
        df['cohort3'] = df.index // grp_size

    return df

In [None]:
def df_bandit_class(df, classname):
    dfgb = df.groupby([classname])
    dfgb = dfgb['Quality'].agg(['count','mean', 'var']).reset_index()
    dfgb['total'] = dfgb['count'].sum()
    dfgb['donext'] = dfgb.apply(lambda x: bd.get_tuned_ucb(x, 'mean', 'var'), axis=1).fillna(999)
    foo = dfgb.sort_values('donext', ascending=False)
    return foo

In [None]:
def df_search(df_in, cohort):
    if not cohort in df_in:
        return df_in

    df_class = df_bandit_class(df_in, cohort)
    print(df_class)
    next_class = df_class.iloc[0, 0]
    if df_in['Complete'].isnull().sum() > 0:
        df_filtered = df_in[df_in[cohort] == next_class]
    else:
        df_filtered = df_in

    return df_filtered

In [None]:
def set_start(df):
    goal_minutes = df['Goal']
    final = datetime.combine(date.today(), time(0, 0)) + timedelta(minutes=goal_minutes)
    return final

In [None]:
# timelist = [datetime.combine(date.today(), time(0, 0)) + timedelta(minutes=i) for i in  range(0,1440,5)]
minute_list = [i for i in  range(5,1440,5)]
df = pd.DataFrame({"Start":minute_list})

In [None]:
currDate = datetime.today()
minute_list = [i for i in  range(5,1440,5)]
df = pd.DataFrame({"Goal":minute_list})
df["Date"] = currDate
df["Complete"] = np.nan
df = df[['Date', 'Goal', 'Complete']]

In [None]:
subprocess.Popen('StopSmoking.xlsx', shell=True)

In [None]:
df_excel = pd.read_excel(io='StopSmoking.xlsx')
df = df.append(df_excel).reset_index(drop=True)

In [None]:
df = bd.reduce(df,'Goal')

In [None]:
df.tail(10)

In [None]:
df['Score'] = np.where(df['Complete'] >= 1, df['Goal']**1.2, 0)
df.loc[df['Complete'].isnull(), 'Score'] = np.NaN

In [None]:
df_quality = bd.get_quality(df['Score'])

In [None]:
df = df.join(df_quality).sort_values('Quality', ascending=False)

In [None]:
val_df = set_cohorts(df, 'Goal')
df = df.merge(val_df, on='Goal')

In [None]:
currDay = calendar.day_name[currDate.weekday()]
df_today = df
df_today['weekday'] = df_today['Date'].dt.day_name()
df_today = df_today[df_today['weekday'] == currDay]
today_sum = df_today['Complete'].notnull().sum()
if today_sum >= 10:
    df = df_today

In [None]:
df.sort_values('Date', ascending=True).tail(10)

In [None]:
df = df.sort_values('Quality', ascending=False)
df.head(10)

In [None]:
df_step1 = df_search(df,'cohort1')
df_step1

In [None]:
df_step2 = df_search(df_step1,'cohort2')
df_step2

In [None]:
df_step3 = df_search(df_step2,'cohort3')
df_step3

In [None]:
df = df_bandit_class(df_step3, 'Goal')
df['CanStart'] = df.apply(set_start, axis=1).dt.strftime('%I:%M %p')
df