In [1]:
import pandas as pd
import pyodbc
import numpy as np
import plotly.express as px
from datetime import datetime
import statsmodels.api as sm
import statsmodels.tsa.api as tsa

pd.options.display.float_format = '{:,.2f}'.format
pd.options.display.max_columns = 50
pd.options.display.max_rows = 30

def file_to_string(fileName):
    file = open(fileName,'r')
    string = file.read()
    file.close()
    return string

def run_query(query):
    cnxn = pyodbc.connect('DSN=edp-workbench-cshub', autocommit=True)
    df = pd.read_sql_query(query,cnxn)
    cnxn.close()
    return df
    
def timestamp():
    return datetime.now().strftime('%Y-%m-%d %I:%M:%S %p')

def add_conditionals(df):
    df['Date']=pd.to_datetime(df['Date'])
    df['TestGroup'] = (df['Employee'].isin((
        '364717','426097','547655','552121',
        '554487','569375','572247','572815',
        '572909','573190','573192','573276',
        '573573','573585','575731','575996',
        '576565','576911','577073','577246',
        '579162','580895','580911','581015',
        '581139','581145','581275'
        ))).astype(int)
    df['AfterExpertAssistLaunch']=(df['Date']>=datetime(2023,9,25)).astype(int)
    df['ExpertAssistOnForThisUser'] = df['AfterExpertAssistLaunch']*df['TestGroup']
    df['Day'] = df['Date'].dt.weekday
    days = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}
    for day in days:
        df[days[day]]=(df['Day']==day).astype(int)
    df['Week']=df['Date'].dt.isocalendar().week
    for week in df['Week'].unique():
        df['Week '+str(week)]= (df['Week']==week).astype(int)
    for queue in df['TaskQueue'].unique():
        df[queue] = (df['TaskQueue']==queue).astype(int)
    df['HandleTimeZ']=(df['HandleTime']-df['HandleTime'].mean())/df['HandleTime'].std()
    df['Log(HandleTime)']=np.log(df['HandleTime'].replace(0,df[df['HandleTime']>0]['HandleTime'].min()))
    df['Log(HandleTime)Z']=(df['Log(HandleTime)']-df['Log(HandleTime)'].mean())/df['Log(HandleTime)'].std()
    df['Constant'] =1
    return df

In [2]:
dfp = run_query(file_to_string('AHT And Sales.SQL'))
dfp.describe()
dfp.head()
dfp

  df = pd.read_sql_query(query,cnxn)


Unnamed: 0,Date,Employee,callNo,TaskQueue,HandleTime,Offered,Accepted
0,2023-10-13,573192,WRfb093a202dca6dfb043a3309adc8203e,VZN_MTS_Bundle_EN_2233,471,1,0
1,2023-10-13,572815,WRf70475dd95a7f2a44103fb950a9f0d12,VZN_MTS_Bundle_EN_2233,494,1,0
2,2023-10-13,572815,WRb45453500c4af1b96b88b5abdb2d7ba0,VZN_MTS_Bundle_EN_2233,445,1,0
3,2023-09-13,293932,WR1f48ce68dc0db01ed9698b75ce4f7841,VZN_5G_UPSELL_5013,1646,1,0
4,2023-09-13,573585,WR6fe7476939c6da0ff07dd7831ab2d038,VZN_MTS_Bundle_EN_2233,106,1,0
...,...,...,...,...,...,...,...
12945,2023-09-29,575996,WR79a86c3ddc9247e6ef15c6942caf6cf9,VZN_MTS_Bundle_EN_2233,1306,1,0
12946,2023-10-10,575996,WR483c5a23c8017da84bbe53af22cb5acd,VZN_MTS_Bundle_EN_2233,467,1,0
12947,2023-09-27,581015,WR9af9dab5c620dd3b9a2e500e2d0c6dda,VZN_MTS_Bundle_EN_2233,2217,0,0
12948,2023-10-02,581015,WRe2458b43cb1b526d8450eac49cf9b755,VZN_MTS_Bundle_EN_2233,535,1,0


# EDA Graphs

## Boxplots and numbers per group

In [None]:
df = dfp.copy()
df = add_conditionals(df)


px.box(x=df['TaskQueue'],y=df['HandleTime']).show()
pivot=pd.pivot_table(df,columns='TaskQueue',index='Date',values='HandleTime')
display(pivot.describe())
del pivot
display(pd.pivot_table(df,index='TaskQueue',values=['HandleTime']))
display('Percent of Each Task',df[df['TaskQueue'].unique()].mean())
del df

## Boxplots and numbers per week

In [None]:
df = dfp.copy()
df = add_conditionals(df)

df['Group'] = np.where(df['TestGroup'],'Test','Control')
px.box(df,x='Week',y='HandleTimeZ',color='TestGroup').show()
pivot=pd.pivot_table(df,columns='Week',index='Employee',values='HandleTimeZ',aggfunc=np.mean)
display(pivot.describe())
del df, pivot

In [None]:
df = dfp.copy()
df = add_conditionals(df)

px.histogram(df,x='HandleTimeZ',histnorm='percent', color='Offered').show()
px.histogram(df,x='Log(HandleTime)Z',histnorm='percent', color='Offered').show()

px.histogram(df,x='HandleTimeZ',nbins=20,histnorm='percent', color='Offered').show()
px.histogram(df,x='Log(HandleTime)Z',nbins=20,histnorm='percent',color='Offered').show()

px.histogram(df,x='HandleTimeZ',histnorm='percent', color='Accepted').show()
px.histogram(df,x='Log(HandleTime)Z',histnorm='percent', color='Accepted').show()

px.histogram(df,x='HandleTimeZ',nbins=20,histnorm='percent', color='Accepted').show()
px.histogram(df,x='Log(HandleTime)Z',nbins=20,histnorm='percent',color='Accepted').show()
del df

In [None]:
df = dfp.copy()
df = add_conditionals(df)

px.histogram(df,x='HandleTimeZ',histnorm='percent').show()
px.histogram(df,x='Log(HandleTime)Z',histnorm='percent').show()

px.histogram(df,x='HandleTimeZ',nbins=20,histnorm='percent').show()
px.histogram(df,x='Log(HandleTime)Z',nbins=20,histnorm='percent').show()
del df

# Stats Work

## Difference In Differences

In [24]:
df = dfp.copy()
df.head()
# df=df[df['TaskQueue'].isin(['VZN_MTS_Bundle_EN_2233','VZN_Onboarding_2628'])]

df = add_conditionals(df)
df=df[df['Date']<=datetime(2023,10,15)]
df['Group'] = np.where(df['TestGroup'],'Test','Control')
df['Period']=np.where(df['AfterExpertAssistLaunch'],'PostLaunch','PreLaunch')

pivot = pd.pivot_table(df,columns=['Period','Group'],index='TaskQueue',values='HandleTime',margins=True)
# pivot = pivot.replace(np.nan,0)
for col in pivot['PostLaunch'].columns:
        # pivot['% Difference',col]=((pivot['PostLaunch',col]/pivot['PreLaunch',col])-1)
        pivot['Abs Difference',col]=((pivot['PostLaunch',col]-pivot['PreLaunch',col]))
        pivot['Total Calls in Queue',col]=df[df['Group']==col]['TaskQueue'].value_counts()
        pivot.loc['All']['Total Calls in Queue',col]=pivot['Total Calls in Queue',col].sum()
# pivot['% Difference','B/-(W) Than Control']=pivot['% Difference','Control']-pivot['% Difference','Test']
pivot['Abs Difference','B/-(W) Than Control']=pivot['Abs Difference','Control']-pivot['Abs Difference','Test']

pivot = pivot.sort_values(by=[('Total Calls in Queue','Test')],axis=0,ascending=False)
pivot = pivot.sort_index(axis=1,ascending=False)
del pivot['All']

display("AHT by Test Period and Group",pivot)
# pivot.to_excel('AHT.xlsx')
del pivot, df, col

'AHT by Test Period and Group'

Period,Total Calls in Queue,Total Calls in Queue,PreLaunch,PreLaunch,PostLaunch,PostLaunch,Abs Difference,Abs Difference,Abs Difference
Group,Test,Control,Test,Control,Test,Control,Test,Control,B/-(W) Than Control
TaskQueue,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
All,7149.0,5801.0,1128.46,1083.05,1113.2,1104.43,-15.27,21.38,36.64
VZN_MTS_Bundle_EN_2233,5954.0,4843.0,1100.63,1048.03,1077.4,1080.29,-23.23,32.25,55.49
VZN_Onboarding_2628,709.0,447.0,1302.74,1365.58,1361.32,1242.33,58.58,-123.25,-181.83
VZN_MTS_Bundle_SP_2234,442.0,23.0,1213.06,792.09,1169.7,394.0,-43.36,-398.09,-354.73
VZN_Onboarding_SP_2629,31.0,,1245.78,,1749.31,,503.53,,
VZN_SameDay_Replac_ACT_2792,8.0,4.0,467.0,1269.0,661.2,620.0,194.2,-649.0,-843.2
VZN_Resolution_SP_2462,5.0,1.0,1602.67,,1935.0,852.0,332.33,,
VZN_5G_UPSELL_5013,,483.0,,1163.7,,1305.86,,142.16,


In [28]:
df = dfp.copy()
df.head()
# df=df[df['TaskQueue'].isin(['VZN_MTS_Bundle_EN_2233','VZN_Onboarding_2628'])]

df = add_conditionals(df)
df=df[df['Date']<=datetime(2023,10,15)]
df['Group'] = np.where(df['TestGroup'],'Test','Control')
df['Period']=np.where(df['AfterExpertAssistLaunch'],'PostLaunch','PreLaunch')
df['Offered'] = df['Offered'].astype(str).replace('0','Not Offered').replace('1','Offered')
df['Accepted'] = df['Accepted'].astype(str).replace('0','Not Accepted').replace('1','Accepted')

pivot = pd.pivot_table(df,columns=['Period','Group'],index=['Offered','Accepted'],values='HandleTime')
# pivot = pivot.replace(np.nan,0)
for col in pivot['PostLaunch'].columns:
        # pivot['% Difference',col]=((pivot['PostLaunch',col]/pivot['PreLaunch',col])-1)
        pivot['Abs Difference',col]=((pivot['PostLaunch',col]-pivot['PreLaunch',col]))
        pivot['% of Calls in Queue',col]=100*df[df['Group']==col][['Offered','Accepted']].value_counts()/df[df['Group']==col][['Offered','Accepted']].value_counts().sum()
# pivot['% Difference','B/-(W) Than Control']=pivot['% Difference','Control']-pivot['% Difference','Test']
pivot['Abs Difference','B/-(W) Than Control']=pivot['Abs Difference','Control']-pivot['Abs Difference','Test']

# pivot = pivot.sort_values(by=[('Total Calls in Queue','Test')],axis=0,ascending=False)
pivot = pivot.sort_index(axis=1,ascending=False)

display("AHT by Test Period and Sales and Accepts",pivot[['% of Calls in Queue','PreLaunch','PostLaunch','Abs Difference']])
# pivot.to_excel('AHT.xlsx')
del pivot, df, col

'AHT by Test Period and Sales and Accepts'

Unnamed: 0_level_0,Period,% of Calls in Queue,% of Calls in Queue,PreLaunch,PreLaunch,PostLaunch,PostLaunch,Abs Difference,Abs Difference,Abs Difference
Unnamed: 0_level_1,Group,Test,Control,Test,Control,Test,Control,Test,Control,B/-(W) Than Control
Offered,Accepted,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Not Offered,Not Accepted,49.06,44.3,1016.6,998.16,903.24,901.0,-113.35,-97.16,16.19
Offered,Accepted,5.27,6.33,1987.77,1809.86,1814.22,1715.55,-173.55,-94.31,79.23
Offered,Not Accepted,45.67,49.37,1194.33,1096.88,1193.32,1151.96,-1.01,55.08,56.09


In [None]:
df = dfp.copy()
df = add_conditionals(df)
df['Group'] = np.where(df['TestGroup'],'Test','Control')
df['Period']=np.where(df['AfterExpertAssistLaunch'],'PostLaunch','PreLaunch')
display(df.columns)
pivot = pd.pivot_table(df,values='Constant',columns=['Period','Group'],index='TaskQueue',aggfunc=len)
# pivot= 100*pivot.replace(np.nan,0)/pivot.sum()
for col in pivot['PostLaunch'].columns:
     pivot['Abs Difference',col]=((pivot['PostLaunch',col]-pivot['PreLaunch',col]))

display(pivot)

del df

## Regression of Handle Time
Benifit of increased explainability. However as we saw the data is not Gaussian so the likelyhood of it being highly accurate are low

In [31]:
df = dfp.copy()
df = add_conditionals(df)
# df=df[df['Date']<=datetime(2023,10,8)]

# Look at only 2 queue types
# df=df[df['TaskQueue'].isin(['VZN_Onboarding_2628','VZN_MTS_Bundle_EN_2233'])]
df['Having ExpertAssist Effect on Onboarding']=df['VZN_Onboarding_2628']*df['ExpertAssistOnForThisUser']
df['Having ExpertAssist Effect on MTS Bundle']=df['VZN_MTS_Bundle_EN_2233']*df['ExpertAssistOnForThisUser']
df['Onboarding Post Launch']=df['VZN_Onboarding_2628']*df['AfterExpertAssistLaunch']

df['Week']=df['Week'].astype(int)
df['WeeksAgo']=-(df['Week']-df['Week'].max())

model = sm.OLS(
    endog=df['HandleTime'],
    exog=df[[
        # 'Having ExpertAssist Effect on Onboarding',
        # 'Having ExpertAssist Effect on MTS Bundle',
        'ExpertAssistOnForThisUser',
        'Constant','TestGroup',#'WeeksAgo',
        "VZN_Onboarding_2628",
        # 'VZN_MTS_Bundle_EN_2233',
        'Offered','Accepted'
]]).fit()
display(model.summary(),model.summary2().tables[1])

del df, model

0,1,2,3
Dep. Variable:,HandleTime,R-squared:,0.052
Model:,OLS,Adj. R-squared:,0.051
Method:,Least Squares,F-statistic:,141.3
Date:,"Mon, 16 Oct 2023",Prob (F-statistic):,2.17e-146
Time:,08:46:24,Log-Likelihood:,-106740.0
No. Observations:,12950,AIC:,213500.0
Df Residuals:,12944,BIC:,213500.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
ExpertAssistOnForThisUser,-58.9968,21.915,-2.692,0.007,-101.954,-16.039
Constant,916.7534,15.314,59.863,0.000,886.735,946.772
TestGroup,67.3875,19.470,3.461,0.001,29.223,105.552
VZN_Onboarding_2628,245.2008,28.377,8.641,0.000,189.577,300.824
Offered,208.5274,16.778,12.428,0.000,175.639,241.415
Accepted,661.6700,35.705,18.531,0.000,591.683,731.657

0,1,2,3
Omnibus:,6247.633,Durbin-Watson:,2.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,48806.48
Skew:,2.176,Prob(JB):,0.0
Kurtosis:,11.456,Cond. No.,5.97


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
ExpertAssistOnForThisUser,-59.0,21.92,-2.69,0.01,-101.95,-16.04
Constant,916.75,15.31,59.86,0.0,886.74,946.77
TestGroup,67.39,19.47,3.46,0.0,29.22,105.55
VZN_Onboarding_2628,245.2,28.38,8.64,0.0,189.58,300.82
Offered,208.53,16.78,12.43,0.0,175.64,241.42
Accepted,661.67,35.71,18.53,0.0,591.68,731.66


# Regression of LogHandleTime with prediction
Less explainable, however it should be more accurate as the AHT data was much closer to Lognormal than normal Gaussian

In [None]:
df = dfp.copy()
df = add_conditionals(df)

df['Having ExpertAssist Effect on Onboarding']=df['VZN_Onboarding_2628']*df['ExpertAssistOnForThisUser']
df['Having ExpertAssist Effect on MTS Bundle']=df['VZN_MTS_Bundle_EN_2233']*df['ExpertAssistOnForThisUser']

df['Onboarding Post Launch']=df['VZN_Onboarding_2628']*df['AfterExpertAssistLaunch']
df['MTS Bundle Post Launch']=df['VZN_MTS_Bundle_EN_2233']*df['AfterExpertAssistLaunch']

df['Week']=df['Week'].astype(int)
df['WeeksAgo']=-(df['Week']-df['Week'].max())

model = sm.OLS(
    endog=df['Log(HandleTime)'],
    # endog=df['HandleTime'],
    exog=df[[
        # 'Having ExpertAssist Effect on Onboarding',
        "VZN_Onboarding_2628",
        'Having ExpertAssist Effect on MTS Bundle',
        # 'ExpertAssistOnForThisUser',
        'Constant','TestGroup',
        'Offered','Accepted'
]]).fit()

# Not up to date prediction
# print(
#     "Expert Assist Being on For this user leads  to",
#     (np.exp(model.predict(exog=[1,1,1,df['VZN_Onboarding_2628'].mean(),df['Offered'].mean(),df['Accepted'].mean()]))
#     -np.exp(model.predict(exog=[0,1,1,df['VZN_Onboarding_2628'].mean(),df['Offered'].mean(),df['Accepted'].mean()])))[0],
#     " Additional(-Fewer) Seconds of AHT"
#     )

display(model.summary(),model.summary2().tables[1])
del df, model