# Check confidence interval

In [5]:
from datetime import datetime, timedelta
import pandas as pd
from statsmodels.formula.api import ols

In [6]:
def gen_prop_lr(br_object):
    aggr = br_object.imps.groupby([br_object.index.date, br_object.index.weekday, br_object.index.hour]).sum()
    aggr.index.names = ['date', 'weekday', 'hour']
    aggr = aggr.reset_index()
    model = ols('imps ~ C(weekday) + C(hour)', data=aggr).fit()
    weekday_list = range(7)
    weekday_list = list(itertools.chain.from_iterable(itertools.repeat(x, 24) for x in weekday_list))
    hour_list = list()
    for i in range(7):
        for z in range(24):
            hour_list.append(z)
    df_fitting = pd.DataFrame({'weekday': weekday_list, 'hour': hour_list})
    prediction = model.predict(df_fitting)
    df_fitting['fitted'] = prediction
    pattern = df_fitting.pivot_table('fitted', index=df_fitting.hour, columns=df_fitting.weekday)
    line, col = pattern.shape
    for i in range(col):
        pattern.iloc[:, i] = pattern.iloc[:, i] * 100 / pattern.iloc[:, i].sum()
    return pattern

In [7]:
def gen_prop_lr_hour(br_object):
    aggr = br_object.imps.groupby([br_object.index.date, br_object.index.hour]).sum().reset_index()
    aggr.columns = ['date', 'hour', 'imps']
    model = ols('imps ~ C(hour)', data=aggr).fit()
    hour_list = list()
    for z in range(24):
        hour_list.append(z)
    df_fitting = pd.DataFrame({'hour': hour_list})
    prediction = model.predict(df_fitting)
    df_fitting['fitted'] = prediction
    df_fitting.index = df_fitting.hour
    del df_fitting['hour']
    df_fitting.iloc[:, 0] = df_fitting.iloc[:, 0] * 100 / df_fitting.iloc[:, 0].sum()
    return df_fitting

In [8]:
records = []
for i in range(24 * 8):
    records.append(
        {'date': datetime(2020, 8, 4, 0, 59, 59) + timedelta(hours=i),
         'imps': 100}
    )
df = pd.DataFrame.from_records(records, index='date')

In [23]:
df = pd.read_pickle('br_clean.pkl')

In [24]:
data = df[df.TZ == 'America/New_York']
data.set_index('local_date', inplace=True)
data.sort_index(inplace = True)

In [27]:
aggr = data.imps.groupby([data.index.date, data.index.weekday, data.index.hour]).sum()
aggr.index.names = ['date', 'weekday', 'hour']
aggr = aggr.reset_index()
model = ols('imps ~ C(weekday) + C(hour)', data=aggr).fit()

In [28]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   imps   R-squared:                       0.189
Model:                            OLS   Adj. R-squared:                  0.024
Method:                 Least Squares   F-statistic:                     1.144
Date:                Wed, 05 Aug 2020   Prob (F-statistic):              0.296
Time:                        16:19:37   Log-Likelihood:                -1798.5
No. Observations:                 172   AIC:                             3657.
Df Residuals:                     142   BIC:                             3751.
Df Model:                          29                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept        1.466e+05   3911.114     

In [29]:
ic = model.conf_int(alpha=0.05)
ic.columns = ['inf', 'sup']

In [30]:
ic['size'] = round((ic['inf'] - ic['sup']).abs(),5)

In [31]:
ic

Unnamed: 0,inf,sup,size
Intercept,138835.143402,154298.209292,15463.06589
C(weekday)[T.1],-10810.211811,-591.835579,10218.37623
C(weekday)[T.2],-8329.86235,2236.684424,10566.54677
C(weekday)[T.3],-8440.267506,2126.279268,10566.54677
C(weekday)[T.4],-8760.302475,1806.2443,10566.54677
C(weekday)[T.5],-4759.063796,5807.482979,10566.54677
C(weekday)[T.6],-5364.705355,5201.84142,10566.54677
C(hour)[T.1],-9948.699227,9616.743551,19565.44278
C(hour)[T.2],-10014.236606,9551.206172,19565.44278
C(hour)[T.3],-9622.328447,9943.114331,19565.44278


In [32]:
databis = data['2020-07-07 23:58:00':'2020-07-10 23:59:59']

In [33]:
aggr = databis.imps.groupby([databis.index.date, databis.index.weekday, databis.index.hour]).sum()
aggr.index.names = ['date', 'weekday', 'hour']
aggr = aggr.reset_index()
model2 = ols('imps ~ C(weekday) + C(hour)', data=aggr).fit()

In [34]:
ic2 = model2.conf_int(alpha=0.05)
ic2.columns = ['inf', 'sup']

In [35]:
ic2['size'] = round((ic2['inf'] - ic2['sup']).abs(),5)

In [36]:
ic2

Unnamed: 0,inf,sup,size
Intercept,1868.415383,4174.271634,2305.85625
C(weekday)[T.2],138517.738184,140601.531518,2083.79333
C(weekday)[T.3],138407.333028,140491.126361,2083.79333
C(weekday)[T.4],138087.298059,140171.091393,2083.79333
C(hour)[T.1],-1067.755366,390.596176,1458.35154
C(hour)[T.2],-803.515212,654.83633,1458.35154
C(hour)[T.3],-846.713699,611.637843,1458.35154
C(hour)[T.4],-1018.184669,440.166873,1458.35154
C(hour)[T.5],499.835455,1958.186997,1458.35154
C(hour)[T.6],1256.642608,2714.99415,1458.35154
