# Define Functions and Run Initial Query

In [2]:
import pandas as pd
import pyodbc
import numpy as np
import plotly.express as px
from datetime import datetime
import itertools

pd.options.display.float_format = '{:,.5f}'.format
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100

def file_to_string(fileName):
    file = open(fileName,'r')
    string = file.read()
    file.close()
    return string

def run_query(query):
    cnxn = pyodbc.connect('DSN=edp-workbench-cshub', autocommit=True)
    df = pd.read_sql_query(query,cnxn)
    cnxn.close()
    return df
    
def timestamp():
    return datetime.now().strftime('%Y-%m-%d %I:%M:%S %p')

def add_conditionals(df):
    df = df.copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df['Day'] = df['Date'].dt.weekday
    # days = {0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:'Friday',5:'Saturday',6:'Sunday'}
    # for day in days:
    #     df[days[day]]=(df['Day']==day).astype(int)
    df['Week']=df['Date'].dt.isocalendar().week
    # df['Weeks Ago'] = df['Week'].astype(int).max() - df['Week'].astype(int)
    for week in df['Week'].unique():
        df['Week '+str(week)]= (df['Week']==week).astype(int)
    df['Constant'] =1
    return df

def add_metrics(df):
    df = df.copy()
    df['AHT'] = df['Total_Resolution_Time']/df['Sessions']
    df['HERO'] = df['HERO_XR_Score']/df['HERO_XR_Count']
    df['SP100'] = df['Total_Accepted']/df['Total_Eligible']
    df['Transfers'] = df['Transfer_Score']/df['Sessions']
    df['Constant'] =1
    return df

In [9]:
test_df = run_query(
    """SELECT
            AT_TIMEZONE(
                FROM_ISO8601_TIMESTAMP(
                    REPLACE(
                        TRIM(element_at(VHE.edp_raw_data_map, 'ExtraData_endRequestTime')),
                        ' ','T'))
            ,'America/Chicago') as "endRequestTime",
            element_at(VHE.edp_raw_data_map, 'ExtraData_endRequestTime') RawTimestamp_End,
            
            AT_TIMEZONE(
                FROM_ISO8601_TIMESTAMP(
                    REPLACE(
                        TRIM(element_at(VHE.edp_raw_data_map, 'ExtraData_startRequestTime')),
                        ' ','T'))
            ,'America/Chicago') as "StartRequestTime",
            element_at(VHE.edp_raw_data_map, 'ExtraData_startRequestTime') RawTimestamp_Start,
        TRY(CAST(element_at(VHE.edp_raw_data_map, 'ExtraData_agentId') AS BIGINT)) as "Employee"
    FROM 
        hive.care.l1_verizon_home_events VHE
    WHERE 1=1
        AND element_at(VHE.edp_raw_data_map, '_header_eventContext_producer') = 'eip-ingestion-data-science'
        AND element_at(VHE.edp_raw_data_map, 'Identities_messageSid') IS NOT NULL
        AND element_at(VHE.edp_raw_data_map, 'Name') = 'RequestSummaryVoice'
        AND element_at(VHE.edp_raw_data_map, 'Scope')='GenerativeAISearchBotVoice'
        AND DATE(edp_updated_date) >= DATE('2023-12-04')
    LIMIT 100
"""
        )
test_df

  df = pd.read_sql_query(query,cnxn)


In [None]:
dfp = run_query(file_to_string('DailyMetrics.SQL'))
display(dfp.head())

# Test
1. Creates representative samples
2. Tests those samples if they are within a number of standard deviations from the test group. If so adds them to a list for further examination.

In [None]:
employees = add_conditionals(dfp.copy())
employees

# Define Metrics and Groups Being Tested
mainMetrics=['Sessions','AHT','SP100','HERO','Transfers']
testGroup = ["Test Members","Control Members"]


# Get yearly metrics
results_pivot = pd.pivot_table(
    dfp.copy(),
    index='Employee',
    values=['Total_Accepted','Total_Eligible','HERO_XR_Score','HERO_XR_Count', 
            'Transfer_Score', 'Transfer_Count','Total_Resolution_Time','Sessions'],
            aggfunc=np.sum)

# AddMetrics
results_pivot = add_metrics(results_pivot)
results_pivot = results_pivot[mainMetrics]

# ConvertMetrics to Z-Scores
for metric in mainMetrics:
    results_pivot[metric] = (results_pivot[metric]-results_pivot[metric].mean())/results_pivot[metric].std()
    del metric

# Build out test Frame
testFrame = pd.DataFrame(columns={testGroup[0]:[],testGroup[0]:[]})

# Get the samples and run them
test_samples = list(itertools.combinations(results_pivot.index,r=6)) 
for sample in test_samples:
    # Get Lists
    test_List    = list(results_pivot[ results_pivot.index.isin(sample)].index)
    control_List = list(results_pivot[~results_pivot.index.isin(sample)].index)
    # Get resutlts
    test_Results=results_pivot[results_pivot.index.isin(test_List)].mean()
    control_Results=results_pivot[results_pivot.index.isin(control_List)].mean()
    # Put results in DataFrame
    appendFrame =pd.DataFrame(data={
        'Test Members': [test_List],
        'Control Members':[control_List],
        'Sessions Difference':test_Results['Sessions'] - control_Results['Sessions'],
        'AHT Difference':test_Results['AHT'] - control_Results['AHT'],
        'SP100 Difference':test_Results['SP100'] - control_Results['SP100'],
        'HERO Difference':test_Results['HERO'] - control_Results['HERO'],
        'Transfers Difference':test_Results['Transfers'] - control_Results['Transfers']
        })
    # Append current results to existing test frame
    testFrame=pd.concat([testFrame,appendFrame],axis=0)
    del appendFrame, sample, test_List, control_List, test_Results, control_Results
del test_samples, testGroup, employees, mainMetrics, results_pivot

display(testFrame)
testFrame.to_excel('ExpertGroupings.xlsx')


In [None]:
df =dfp.copy()
df['Date']=pd.to_datetime(df['Date'])
pivot = df.copy()
pivot_base = pivot[pivot['Sessions']>=0]
pivot = pd.pivot_table(pivot_base.copy(),aggfunc=max,index='Employee',values='Date').sort_values('Date')
pivot = pd.concat((pivot,pd.pivot_table(pivot_base.copy(),aggfunc=sum,index='Employee',values='Sessions')),axis=1).sort_values('Date')
px.bar(pd.pivot_table(df[df['Employee'].isin([549667,573190,293932,536491])].copy(),index='Date',values='Sessions',columns='Employee',aggfunc=sum)).show()
px.line(pd.pivot_table(df.copy(),index='Date',values='Sessions',columns='Employee')).show()
display(len(pivot),pivot)
del df, pivot_base, pivot