In [None]:
import pandas as pd
import pyodbc
import numpy as np
import plotly.express as px
from datetime import datetime
import statsmodels.api as sm
import statsmodels.tsa.api as tsa

pd.options.display.float_format = '{:,.2f}'.format
pd.options.display.max_columns = 50
pd.options.display.max_rows = 30

def file_to_string(fileName):
    file = open(fileName,'r')
    string = file.read()
    file.close()
    return string

def run_query(query):
    cnxn = pyodbc.connect('DSN=edp-workbench-cshub', autocommit=True)
    df = pd.read_sql_query(query,cnxn)
    cnxn.close()
    return df
    
def timestamp():
    return datetime.now().strftime('%Y-%m-%d %I:%M:%S %p')

def add_conditionals(df):
    df['Date']=pd.to_datetime(df['Date'])
    df['TestGroup'] = (df['Employee'].isin((
        '364717','426097','547655','552121','554487','575731'
        '569375','572247','572815','572909','573190','580895'
        '573192','573276','573573','573585','575996','580911'
        '576565','576911','579162','581139','581275','581015'))).astype(int)
    df['AfterExpertAssistLaunch']=(df['Date']>=datetime(2023,9,25)).astype(int)
    df['ExpertAssistOnForThisUser'] = df['AfterExpertAssistLaunch']*df['TestGroup']
    df['Day'] = df['Date'].dt.weekday
    days = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}
    for day in days:
        df[days[day]]=(df['Day']==day).astype(int)
    df['Week']=df['Date'].dt.isocalendar().week
    for week in df['Week'].unique():
        df['Week '+str(week)]= (df['Week']==week).astype(int)
    for queue in df['TaskQueue'].unique():
        df[queue] = (df['TaskQueue']==queue).astype(int)
    df['HandleTimeZ']=(df['HandleTime']-df['HandleTime'].mean())/df['HandleTime'].std()
    df['Log(HandleTime)']=np.log(df['HandleTime'].replace(0,1))
    df['Log(HandleTime)Z']=(df['Log(HandleTime)']-df['Log(HandleTime)'].mean())/df['Log(HandleTime)'].std()
    df['Constant'] =1
    return df

PILOT_EXPERTS = """
    '293932','321930','364717','393198','426097',
    '536491','538855','547381','547655','548026',
    '548646','549667','550446','552121','552402',
    '554243','554487','561761','567519','567620',
    '568127','568568','568651','569153','569375',
    '569498','570227','572247','572815','572909',
    '573190','573192','573276','573573','573585',
    '574166','575731','575996','576565','576601',
    '576911','577073','577246','578299','579162',
    '579630','580895','580911','581015','581139',
    '581145','581275','581396','581694' 
    """


In [None]:
dfp = run_query(file_to_string('AHT And Sales.SQL').replace('$pilot_experts$', PILOT_EXPERTS ))
dfp.describe()
dfp.head()
dfp

# EDA Graphs

## Boxplots and numbers per group

In [None]:
df = dfp.copy()
df = add_conditionals(df)


px.box(x=df['TaskQueue'],y=df['HandleTime']).show()
pivot=pd.pivot_table(df,columns='TaskQueue',index='Date',values='HandleTime')
display(pivot.describe())
del pivot
display(pd.pivot_table(df,index='TaskQueue',values=['HandleTime']))
display('Percent of Each Task',df[df['TaskQueue'].unique()].mean())
del df

## Boxplots and numbers per week

In [None]:
df = dfp.copy()
df = add_conditionals(df)

df['Group'] = np.where(df['TestGroup'],'Test','Control')
px.box(df,x='Week',y='HandleTimeZ',color='TestGroup').show()
pivot=pd.pivot_table(df,columns='Week',index='Employee',values='HandleTimeZ',aggfunc=np.mean)
display(pivot.describe())
del df, pivot

In [None]:
df = dfp.copy()
df = add_conditionals(df)

px.histogram(df,x='HandleTimeZ',histnorm='percent').show()
px.histogram(df,x='Log(HandleTime)Z',histnorm='percent').show()

px.histogram(df,x='HandleTimeZ',nbins=20,histnorm='percent').show()
px.histogram(df,x='Log(HandleTime)Z',nbins=20,histnorm='percent').show()
del df

# Stats Work

## Difference In Differences

In [None]:
df = dfp.copy()
df.head()
df = add_conditionals(df)
df['Group'] = np.where(df['TestGroup'],'Test','Control')
df['Period']=np.where(df['AfterExpertAssistLaunch'],'PostLaunch','PreLaunch')

pivot = pd.pivot_table(df,columns=['AfterExpertAssistLaunch','TestGroup'],index='TaskQueue',values='HandleTime',margins=True)
display(pivot.sort_index())
del pivot

pivot = pd.pivot_table(df,columns=['Period','Group'],index='TaskQueue',values='HandleTime',margins=True)
for col in pivot['PostLaunch'].columns:
        pivot['% Difference',col]=((pivot['PostLaunch',col]/pivot['PreLaunch',col])-1)
        pivot['Abs Difference',col]=((pivot['PostLaunch',col]-pivot['PreLaunch',col]))
        pivot['Total Calls in Queue',col]=df[df['Group']==col]['TaskQueue'].value_counts()
        pivot.loc['All']['Total Calls in Queue',col]=pivot['Total Calls in Queue',col].sum()
pivot['% Difference','B/-(W) Than Control']=pivot['% Difference','Control']-pivot['% Difference','Test']
pivot['Abs Difference','B/-(W) Than Control']=pivot['Abs Difference','Control']-pivot['Abs Difference','Test']

pivot = pivot.sort_values(by=[('Total Calls in Queue','Test')],axis=0,ascending=False)
pivot = pivot.sort_index(axis=1,ascending=False)
del pivot['All']

display("AHT by Test Period and Group",pivot)
# pivot.to_excel('AHT.xlsx')
del pivot, df, col

## Regression of Handle Time
Benifit of increased explainability. However as we saw the data is not Gaussian so the likelyhood of it being highly accurate are low

In [None]:
df = dfp.copy()
df = add_conditionals(df)


# Look at only 2 queue types
df['Having ExpertAssist Effect on Onboarding']=df['VZN_Onboarding_2628']*df['ExpertAssistOnForThisUser']
df['Having ExpertAssist Effect on MTS Bundle']=df['VZN_MTS_Bundle_EN_2233']*df['ExpertAssistOnForThisUser']
df['Onboarding Post Launch']=df['VZN_Onboarding_2628']*df['AfterExpertAssistLaunch']

df['Week']=df['Week'].astype(int)
df['WeeksAgo']=-(df['Week']-df['Week'].max())

model = sm.OLS(
    endog=df['HandleTime'],
    exog=df[[
        'Having ExpertAssist Effect on Onboarding',
        'Having ExpertAssist Effect on MTS Bundle','Constant','TestGroup','WeeksAgo',
        "VZN_Onboarding_2628",'Offered','Accepted'
]]).fit()
display(model.summary())
del df, model

# Regression of LogHandleTime with prediction
Less explainable, however it should be more accurate as the AHT data was much closer to Lognormal than normal Gaussian

In [None]:
df = dfp.copy()
df = add_conditionals(df)

df['Having ExpertAssist Effect on Onboarding']=df['VZN_Onboarding_2628']*df['ExpertAssistOnForThisUser']
df['Having ExpertAssist Effect on MTS Bundle']=df['VZN_MTS_Bundle_EN_2233']*df['ExpertAssistOnForThisUser']

df['Onboarding Post Launch']=df['VZN_Onboarding_2628']*df['AfterExpertAssistLaunch']
df['MTS Bundle Post Launch']=df['VZN_MTS_Bundle_EN_2233']*df['AfterExpertAssistLaunch']

df['Week']=df['Week'].astype(int)
df['WeeksAgo']=-(df['Week']-df['Week'].max())

model = sm.OLS(
    endog=df['Log(HandleTime)'],
    # endog=df['HandleTime'],
    exog=df[[
        'Having ExpertAssist Effect on Onboarding',
        "VZN_Onboarding_2628",
        'Having ExpertAssist Effect on MTS Bundle',
        # 'ExpertAssistOnForThisUser',
        'Constant','TestGroup',
        'Offered','Accepted'
]]).fit()

# Not up to date prediction
# print(
#     "Expert Assist Being on For this user leads  to",
#     (np.exp(model.predict(exog=[1,1,1,df['VZN_Onboarding_2628'].mean(),df['Offered'].mean(),df['Accepted'].mean()]))
#     -np.exp(model.predict(exog=[0,1,1,df['VZN_Onboarding_2628'].mean(),df['Offered'].mean(),df['Accepted'].mean()])))[0],
#     " Additional(-Fewer) Seconds of AHT"
#     )

display(model.summary(),model.summary2().tables[1])
del df, model