# Define Functions and Run Initial Query

In [1]:
import pandas as pd
import pyodbc
import numpy as np
import plotly.express as px
from datetime import datetime
import itertools

pd.options.display.float_format = '{:,.5f}'.format
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100

def file_to_string(fileName):
    file = open(fileName,'r')
    string = file.read()
    file.close()
    return string

def run_query(query):
    cnxn = pyodbc.connect('DSN=edp-workbench-cshub', autocommit=True)
    df = pd.read_sql_query(query,cnxn)
    cnxn.close()
    return df
    
def timestamp():
    return datetime.now().strftime('%Y-%m-%d %I:%M:%S %p')

def add_conditionals(df):
    df = df.copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df['Day'] = df['Date'].dt.weekday
    # days = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}
    # for day in days:
    #     df[days[day]]=(df['Day']==day).astype(int)
    df['Week']=df['Date'].dt.isocalendar().week
    # df['Weeks Ago'] = df['Week'].astype(int).max() - df['Week'].astype(int)
    for week in df['Week'].unique():
        df['Week '+str(week)]= (df['Week']==week).astype(int)
    df['Constant'] =1
    return df

def add_metrics(df):
    df = df.copy()
    df['AHT'] = df['Total_Resolution_Time']/df['Sessions']
    df['HERO'] = df['HERO_XR_Score']/df['HERO_XR_Count']
    df['SP100'] = df['Total_Accepted']/df['Total_Eligible']
    df['Transfers'] = df['Transfer_Score']/df['Sessions']
    df['Constant'] =1
    return df

In [None]:
dfp = run_query(file_to_string('DailyMetrics.SQL'))
display(dfp)
# del results_pivot

# Test
1. Creates representative samples
2. Tests those samples if they are within a number of standard deviations from the test group. If so adds them to a list for further examination.

In [None]:
employees = add_conditionals(dfp.copy())


# Define Metrics and Groups Being Tested
mainMetrics=['Sessions','AHT','SP100','HERO']
testGroup = ["Test Members","Control Members"]


# Get totals for the period
results_pivot = pd.pivot_table(
    dfp.copy(),
    index='Employee',
    values=['Total_Accepted','Total_Eligible','HERO_XR_Score','HERO_XR_Count', 
            'Transfer_Score', 'Transfer_Count','Total_Resolution_Time','Sessions'],
            aggfunc=np.sum)
display(results_pivot)


distributionList = [597572, 596097, 597580, 597576, 597574, 597685, 597624, 552704, 575996, 569375, 546357, 546831, 574176, 568545, 569153]

# Get the samples and run them
control_combinations = list(
    itertools.combinations(
        distributionList,
    r=7)) 
test_Base    =  [548026, 426097, 548646, 581015, 547655, 572222, 567799,
     547381, 578299, 580867, 550910, 550446, 579630]

total_samples = len(control_combinations)

# Build out test Frame
testFrame = pd.DataFrame(columns={testGroup[0]:['Base'],testGroup[1]:['Base'],
        'Sessions Difference':[0.0],
        'AHT Difference':[0.0],
        'SP100 Difference':[0.0]},index=range(0,total_samples))
for metric in ['Sessions Difference','AHT Difference','SP100 Difference']:
    testFrame[metric] = testFrame[metric].astype(np.float64)
    del metric
i=0
for sample in control_combinations:
    
    # Add Samples to Base Lists
    test_List    =  test_Base   + list(sample)
    control_List =  [x for x in distributionList if x not in sample]
    # Get resutlts
    test_Results   =  add_metrics(results_pivot[results_pivot.index.isin(test_List)].copy().sum())
    control_Results = add_metrics(results_pivot[results_pivot.index.isin(control_List)].copy().sum())
    # Fill TestFrame with the current data
    newRow ={
        'Test Members': test_List,
        'Control Members':control_List,
        'Sessions Difference':test_Results['Sessions'] - control_Results['Sessions'],
        'AHT Difference':test_Results['AHT'] - control_Results['AHT'],
        'SP100 Difference':test_Results['SP100'] - control_Results['SP100']
        }
    testFrame.loc[i]=newRow
    # increment print percentage and move delete old values
    i = i+1
    print(str(round(i*100/total_samples,3))+'%')
    
    # Break if i>limit
    # This is for testing results without sampling everything
    # if i > 2000:
    #     break
    
    del newRow, sample, test_List, control_List, test_Results, control_Results
del control_combinations, testGroup, employees, mainMetrics, results_pivot

for metric in ['Sessions Difference', 'AHT Difference', 'SP100 Difference']:
    testFrame[metric+' Z Score'] = (testFrame[metric]-testFrame[metric].mean())/testFrame[metric].std()
    del metric
display(testFrame, testFrame.dtypes, testFrame.describe())

testFrame['Manahatan Distance Z Scores'] = testFrame['Sessions Difference Z Score'].abs()+ testFrame['AHT Difference Z Score'].abs()+  testFrame['SP100 Difference Z Score'].abs()
testFrame.to_excel('ExpertGroupings PreSelectedControl v3.xlsx')
# testFrame.to_csv('ExpertGroupings PreSelectedControl.csv')
del testFrame

In [7]:
testFrame.to_excel('ExpertGroupings PreSelectedControl v3.xlsx')

In [8]:
# NEW TEST GROUP SUMMARY SINCE NOV 1st
df = dfp.copy()
df['In Test Group']=(df['Employee'].isin([548026, 426097, 548646, 581015, 547655, 572222, 572269, 567799, 578273, 547381, 578299, 580867, 550910, 550446, 579630]))
df = pd.pivot_table(
    df.copy(),
    index='In Test Group',
    values=['Total_Accepted','Total_Eligible','HERO_XR_Score','HERO_XR_Count', 
            'Transfer_Score', 'Transfer_Count','Total_Resolution_Time','Sessions'],
    aggfunc=np.sum)
df = add_metrics(df)
df

Unnamed: 0_level_0,HERO_XR_Count,HERO_XR_Score,Sessions,Total_Accepted,Total_Eligible,Total_Resolution_Time,Transfer_Count,Transfer_Score,AHT,HERO,SP100,Transfers,Constant
In Test Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
False,564478.42,7285.0,9952,577.0,7417.0,12193008,0.0,7860.0,1225.18167,0.01291,0.07779,0.78979,1
True,717471.5,9331.0,11234,862.0,8601.0,13782292,0.0,7242.0,1226.83746,0.01301,0.10022,0.64465,1


In [None]:
# OLD TEST GROUP SUMMARY SINCE NOV 1st
df = dfp.copy()
df['In Test Group']=(df['Employee'].isin([548026, 426097, 575996, 569375, 581015, 572222, 567799, 547381, 578299, 580867, 574176, 568545, 569153, 579630, 552704]))
df = pd.pivot_table(
    df.copy(),
    index='In Test Group',
    values=['Total_Accepted','Total_Eligible','HERO_XR_Score','HERO_XR_Count', 
            'Transfer_Score', 'Transfer_Count','Total_Resolution_Time','Sessions'],
    aggfunc=np.sum)
df = add_metrics(df)
df