In [1]:
import pandas as pd
from statsmodels.stats.weightstats import ztest
import numpy as np
import math
#import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def GetRawDataForPilotPeople(TargetPath):

    # Retrieve Raw Data With Chosen Columns #
    TimeData = pd.read_csv(TargetPath, usecols=['GPN', 'Person MU Name/Code', 'Transaction Date', 'Transaction Week End Date', 
                        'Eng Number', 'Eng Type Name', 'Eng Activity Name/Code','Eng Type Class Name','HRS'], encoding = 'ISO-8859-1', low_memory=False)
    TimeData['Transaction Week End Date'] = TimeData['Transaction Week End Date'].apply(pd.to_datetime, format='%Y/%m/%d %H:%M:%S')
    TimeData['Transaction Week End Date'] = TimeData['Transaction Week End Date'].apply(pd.Timestamp)

    # People Data #
    TimeData = TimeData[TimeData['GPN'].isin(PredictedStartDates['GPN'])]
    
    # Remove Data Older Than 21/07/2017 #
    #TimeData = TimeData[TimeData['Transaction Week End Date'] >= '2017-07-21']     #Commented to get data of before Time recorder implemented
    
    return TimeData

In [3]:
def SelectChargeableData(TimeData):
    
    #TimeDataChargeable = TimeData[~TimeData['Eng Number'].isin(NonChargeList['Engagement Code'])]  #Commented as we are not using "Non-Chargeable EngagementList.txt"
    
    #Created "Non-Chargeable EngagementTypeClassName.csv" where "Eng Type Class Name" has only 2 values NON CHARGEABLE and AUTHORIZED 
    TimeDataChargeable = TimeData[~TimeData['Eng Type Class Name'].isin(NonChargeList['Eng Type Class Name'])]  

    return TimeDataChargeable

In [4]:
def CalculateWeeklyNumberOfDays(TimeDataChargeable):
    
    ChargeableDaysCount = TimeDataChargeable[['GPN', 'Transaction Week End Date', 'Transaction Date']].groupby(['GPN', 
                                                                    'Transaction Week End Date'])['Transaction Date'].nunique()
    return ChargeableDaysCount

In [5]:
def WeeklyTotalHours(TimeDataChargeable):
    
    TimeDataChargeable_Grouped = TimeDataChargeable[['GPN', 'Transaction Week End Date', 'HRS']].groupby(['GPN', 
                                                                                            'Transaction Week End Date']).sum()
    
    return TimeDataChargeable_Grouped

In [6]:
def WeeklyTotalHours_Billable(TimeDataChargeable):
    
    TotalWeeklyBillableHours = TimeDataChargeable.loc[TimeDataChargeable['Eng Type Name'] == 'External Customer Project', 
                                ['GPN', 'Transaction Week End Date', 'HRS']].groupby(['GPN', 'Transaction Week End Date']).sum()
    
    return TotalWeeklyBillableHours

In [7]:
def BillableRatio(WeeklyBillable, TimeDataChargeable_Grouped):
    
    BillableRatios = (WeeklyBillable / TimeDataChargeable_Grouped).dropna()
    
    return BillableRatios

In [8]:
def ActivityChangeCounter(TimeDataChargeable):
    
    TimeDataChargeable_Pos =  TimeDataChargeable[TimeDataChargeable['HRS'] > 0].copy()

    NumberOfActivityChange = TimeDataChargeable_Pos[['GPN', 'Transaction Week End Date', 
                                            'Eng Activity Name/Code']].groupby(['GPN', 'Transaction Week End Date']).count()
    
    NumberOfActivityChange = NumberOfActivityChange['Eng Activity Name/Code'] / ChargeableDaysCount * 5    
    NumberOfActivityChange.dropna(axis=0, how='any', inplace=True)
    
    return pd.DataFrame(NumberOfActivityChange, columns=['Count'])

In [9]:
def WeeklyTotalHours_NonBillable(TimeDataChargeable):
    
    TotalWeeklyNonBillableHours = TimeDataChargeable.loc[TimeDataChargeable['Eng Type Name'] != 'External Customer Project', 
                                ['GPN', 'Transaction Week End Date', 'HRS']].groupby(['GPN', 'Transaction Week End Date']).sum()
    
    return TotalWeeklyNonBillableHours

In [10]:
def ExtrapolateOnWeeklyBasis(TimeDataChargeable_Grouped):
    
    TimeDataChargeable_Grouped['HRS'] = TimeDataChargeable_Grouped['HRS'] / ChargeableDaysCount * 5
    
    return TimeDataChargeable_Grouped

In [11]:
def RemoveOutliers(DataSeries):
    
    Median = DataSeries.median()
    STD = DataSeries.std()
    
    return DataSeries[DataSeries.between((Median-1.40*STD), (Median+1.40*STD))].get_values()

In [12]:
def TestHypothesis(DataSet_1, DataSet_2, alternative):
    
    TestScores = ztest(DataSet_1, DataSet_2, alternative=alternative)
    
    return TestScores

In [13]:
def CleanAggregatedData(TimeDataChargeable_Grouped):
    
    return TimeDataChargeable_Grouped[(TimeDataChargeable_Grouped['TWH'] > 0) 
                                            & (~TimeDataChargeable_Grouped['TWBH'].isnull())]

In [14]:
def LocationBasedNormalisation(TimeDataChargeable_Grouped):
    
    ExpectedWeeklyWorkingHours_London = 35
    ExpectedWeeklyWorkingHours_NonLondon = 40
    NonLondonBasedPilotPeople = list(set(TimeDataChargeable['GPN'].unique()).difference(LondonBasedPilotPeople))

    ColumnNames = ['TWH', 'TWBH', 'NWAC', 'TWNBH']

    for ColumnName in ColumnNames:

        if ColumnName != 'NWAC':

            TimeDataChargeable_Grouped.loc[LondonBasedPilotPeople, 'N' + ColumnName] = (TimeDataChargeable_Grouped.loc[
                    LondonBasedPilotPeople, ColumnName] - ExpectedWeeklyWorkingHours_London) / ExpectedWeeklyWorkingHours_London
            TimeDataChargeable_Grouped.loc[NonLondonBasedPilotPeople, 'N' + ColumnName] = (TimeDataChargeable_Grouped.loc[
                   NonLondonBasedPilotPeople, ColumnName] - ExpectedWeeklyWorkingHours_NonLondon) / ExpectedWeeklyWorkingHours_NonLondon

        else:
            TimeDataChargeable_Grouped.loc[LondonBasedPilotPeople, ColumnName + 'pH'] = TimeDataChargeable_Grouped.loc[
                                                        LondonBasedPilotPeople, ColumnName] / ExpectedWeeklyWorkingHours_London
            TimeDataChargeable_Grouped.loc[NonLondonBasedPilotPeople, ColumnName + 'pH'] = TimeDataChargeable_Grouped.loc[
                                                  NonLondonBasedPilotPeople, ColumnName] / ExpectedWeeklyWorkingHours_NonLondon
            
    return TimeDataChargeable_Grouped

In [15]:
def BeforeAfterDataSets(PredictedStartDates, DataSet):
    
    
    NonTimeRecorderPilotData = pd.DataFrame(columns=DataSet.columns)
    TimeRecorderPilotData = pd.DataFrame(columns=DataSet.columns)

    for StartDate in PredictedStartDates.index.unique():

        StartingGPNs = PredictedStartDates.loc[StartDate]['GPN']

        if isinstance(StartingGPNs, str):
            StartingGPNs = [StartingGPNs]

        NonTimeRecorderPilotData = NonTimeRecorderPilotData.append(DataSet[(DataSet['GPN'].isin(
                            StartingGPNs)) & (DataSet['Transaction Week End Date'] < StartDate)], ignore_index=True)
        
        TimeRecorderPilotData = TimeRecorderPilotData.append(DataSet[(DataSet['GPN'].isin(
                           StartingGPNs)) & (DataSet['Transaction Week End Date'] >= StartDate)], ignore_index=True)

    #NonTimeRecorderPilotData = NonTimeRecorderPilotData[NonTimeRecorderPilotData.columns[[0, 1, 3, 5, 6, 6]]]
    #TimeRecorderPilotData = TimeRecorderPilotData[TimeRecorderPilotData.columns[[0, 1, 3, 5, 6, 6]]]

    
    return NonTimeRecorderPilotData, TimeRecorderPilotData

In [16]:
def ReportTestResults(DataSet_1, 
                      DataSet_2, 
                      TestArgument, 
                      OutlierRemoval=False, 
                      Normalised=False, 
                      BillableRatio=False, 
                      ActivityChange=False):
        
        
    OutlierPhrase = 'Outliers Not Removed'
    if OutlierRemoval:
        DataSet_1 = RemoveOutliers(DataSet_1)
        DataSet_2 = RemoveOutliers(DataSet_2)
        OutlierPhrase = 'Outliers Removed'
        
        
    z_test = TestHypothesis(DataSet_1, DataSet_2, alternative=TestArgument)
    Mean1 = DataSet_1.mean()
    Mean2 = DataSet_2.mean()

    
    print('\n2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes')
    print(' '*28, OutlierPhrase)
    print('-.-'*25)
    print('\nt-value = {:4.3f}\np-value = {:4.4f}'.format(z_test[0], z_test[1]))
    print('\nConclusion:\n-----------')
    
    
    TestArgumentPhrase = 'INCREASED'
    if TestArgument != 'smaller':
        TestArgumentPhrase = 'DECREASED'
        
        
    TestPhrase = 'SIGNIFICANT'
    if z_test[1] > 0.1:
        TestPhrase = 'NOT SIGNIFICANT'
    print ('\nThe difference in the mean scores of the groups ({:3.4f}, {:3.4f}) is {}.'.format(Mean1, Mean2, TestPhrase))
    
    
    if not Normalised:
        DifferenceInHours = abs(Mean2-Mean1)
    else:
        DifferenceInHours = abs(Mean2-Mean1)*35, abs(Mean2-Mean1)*40
    
    
    Unit = 'hours/week'
    if not BillableRatio:
        
        if not Normalised:
            
            if ActivityChange:
                Unit = 'activity/week'
            print('The mean has {} by {:3.2f} {} after the use of Time Recorder.'.format(TestArgumentPhrase, 
                                                                                                abs(Mean2-Mean1), Unit))
        else:
            if ActivityChange:
                Unit = 'activity/week'
            print('\nThe mean for LONDON has {} by {:3.2f} {} after the use of Time Recorder'.format(TestArgumentPhrase,
                                                                                            abs(Mean2-Mean1)*35, Unit))
            print('Whereas the mean for OTHER LOCATIONS has {} by {:3.2f} {} after the use of Time Recorder'.format(
                                                                        TestArgumentPhrase, abs(Mean2-Mean1)*40, Unit))
    else:
        print('The ratio has {} by {:3.3f} ({:3.1f}%) after the use of Time Recorder.'.format(TestArgumentPhrase, 
                                                                                 abs(Mean2-Mean1), abs(Mean2-Mean1)/Mean1*100))
    
    print('\nConfidence Interval: 95%')
    print('-.-'*25)
    
    return None

In [17]:
# List of Non-Chargable Engagements #
#NonChargeList = pd.read_csv("Non-Chargeable EngagementList.txt", delimiter='\t')    #Commented as we are not using "Non-Chargeable EngagementList.txt"

#Created "Non-Chargeable EngagementTypeClassName.csv" where "Eng Type Class Name" has only 2 values NON CHARGEABLE and AUTHORIZED 
NonChargeList = pd.read_csv("Non-Chargeable EngagementTypeClassName.csv")

# Predicted Start Dates #
PredictedStartDates = pd.read_csv("TRv3_Users.csv", index_col=1)
PredictedStartDates.rename(columns={'Unnamed: 0' : 'GPN'}, inplace=True)

# Chargeable Data #
TimeData = GetRawDataForPilotPeople("GTE Extract_Updated_Opslist.csv")
TimeDataChargeable = SelectChargeableData(TimeData)

# Chargeable Days Count #
ChargeableDaysCount = CalculateWeeklyNumberOfDays(TimeDataChargeable)

# London-Based People #
LondonBasedPilotPeople = TimeDataChargeable[TimeDataChargeable['Person MU Name/Code'] == 'London (00207)']['GPN'].unique()

In [18]:
TimeDataChargeable.head()

Unnamed: 0,GPN,Person MU Name/Code,Transaction Date,Transaction Week End Date,Eng Number,Eng Activity Name/Code,Eng Type Name,Eng Type Class Name,HRS
4,GB012020846,United Kingdom (00195),2018-11-06 00:00:00.000,2018-11-09,39856452,"""General (0000)""",External Customer Project,CHARGEABLE,0.6
7,GB012020846,United Kingdom (00195),2018-11-15 00:00:00.000,2018-11-16,40003802,"""General (0000)""",External Customer Project,CHARGEABLE,0.4
8,GB012020846,United Kingdom (00195),2018-11-16 00:00:00.000,2018-11-16,39281317,"""General (0000)""",External Customer Project,CHARGEABLE,1.2
9,GB012020846,United Kingdom (00195),2018-11-15 00:00:00.000,2018-11-16,38143832,"""General (0000)""",External Customer Project,CHARGEABLE,0.8
10,GB012020846,United Kingdom (00195),2018-11-16 00:00:00.000,2018-11-16,38143832,"""General (0000)""",External Customer Project,CHARGEABLE,0.4


In [19]:
# Total Weekly Hours #
TimeDataChargeable_Grouped = ExtrapolateOnWeeklyBasis(WeeklyTotalHours(TimeDataChargeable))
TimeDataChargeable_Grouped.rename(columns={'HRS' : 'TWH'}, inplace=True)

# Total Weekly Billable Hours #
TimeDataChargeable_Grouped['TWBH'] = ExtrapolateOnWeeklyBasis(WeeklyTotalHours_Billable(TimeDataChargeable))['HRS']

# Data Cleaning Based on TWH and TWBH
TimeDataChargeable_Grouped = CleanAggregatedData(TimeDataChargeable_Grouped)

# Weekly Billable Ratios #
TimeDataChargeable_Grouped['WBR'] = BillableRatio(TimeDataChargeable_Grouped['TWBH'], TimeDataChargeable_Grouped['TWH'])

# Number of Weekly Activity Change #
TimeDataChargeable_Grouped['NWAC'] = ActivityChangeCounter(TimeDataChargeable)['Count']

# Total Weekly Non-Billable Hours #
TimeDataChargeable_Grouped['TWNBH'] = ExtrapolateOnWeeklyBasis(WeeklyTotalHours_NonBillable(TimeDataChargeable))['HRS']

In [20]:
TimeDataChargeable_Grouped.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,TWH,TWBH,WBR,NWAC,TWNBH
GPN,Transaction Week End Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GB01038405C,2016-11-11,17.3,16.1,0.930636,16.0,1.2
GB01038405C,2016-11-18,18.8,18.8,1.0,17.0,
GB01038405C,2016-11-25,16.1,14.1,0.875776,12.0,2.0
GB01038405C,2016-12-02,17.4,14.3,0.821839,19.0,3.1
GB01038405C,2016-12-09,20.3,20.3,1.0,17.0,0.0
GB01038405C,2018-11-09,31.3,25.3,0.808307,24.0,6.0
GB01038405C,2018-11-16,21.7,11.4,0.525346,21.0,10.3
GB01038405C,2018-11-23,25.8,11.3,0.437984,16.0,14.5
GB01038405C,2018-11-30,17.6,11.6,0.659091,16.0,6.0
GB01038405C,2018-12-07,15.4,10.1,0.655844,13.0,5.3


In [21]:
TimeDataChargeable_Grouped = LocationBasedNormalisation(TimeDataChargeable_Grouped)

In [22]:
NonTimeRecorderPilotData, TimeRecorderPilotData = BeforeAfterDataSets(PredictedStartDates, 
                                            TimeDataChargeable_Grouped.reset_index(['GPN', 'Transaction Week End Date']))

In [23]:
NonTimeRecorderPilotData.head()

Unnamed: 0,GPN,Transaction Week End Date,TWH,TWBH,WBR,NWAC,TWNBH,NTWH,NTWBH,NWACpH,NTWNBH
0,GB01038405C,2016-11-11,17.3,16.1,0.930636,16.0,1.2,-0.5675,-0.5975,0.4,-0.97
1,GB01038405C,2016-11-18,18.8,18.8,1.0,17.0,,-0.53,-0.53,0.425,
2,GB01038405C,2016-11-25,16.1,14.1,0.875776,12.0,2.0,-0.5975,-0.6475,0.3,-0.95
3,GB01038405C,2016-12-02,17.4,14.3,0.821839,19.0,3.1,-0.565,-0.6425,0.475,-0.9225
4,GB01038405C,2016-12-09,20.3,20.3,1.0,17.0,0.0,-0.4925,-0.4925,0.425,-1.0


In [24]:
TimeRecorderPilotData.head()

Unnamed: 0,GPN,Transaction Week End Date,TWH,TWBH,WBR,NWAC,TWNBH,NTWH,NTWBH,NWACpH,NTWNBH
0,GB01038405C,2018-12-07,15.4,10.1,0.655844,13.0,5.3,-0.615,-0.7475,0.325,-0.8675
1,GB01038514K,2018-12-07,17.5,17.5,1.0,31.666667,,-0.5625,-0.5625,0.791667,
2,GB01038944Q,2018-12-07,15.75,15.75,1.0,17.5,,-0.60625,-0.60625,0.4375,
3,GB01039613D,2018-12-07,30.375,30.375,1.0,23.75,,-0.240625,-0.240625,0.59375,
4,GB01040825J,2018-12-07,15.333333,15.333333,1.0,28.333333,,-0.616667,-0.616667,0.708333,


In [25]:
# Hypothesis I #
# “While using Time Recorder more time will be recorded and as such, will be more reflective of the time spent working”

In [26]:
# Actual Values #

TWH_NTR = NonTimeRecorderPilotData['TWH'].dropna()
TWH_TR = TimeRecorderPilotData['TWH'].dropna()

ReportTestResults(TWH_NTR, 
                  TWH_TR, 
                  TestArgument='smaller', 
                  OutlierRemoval=True, 
                  Normalised=False, 
                  BillableRatio=False, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = -0.292
p-value = 0.3851

Conclusion:
-----------

The difference in the mean scores of the groups (25.2252, 25.3840) is NOT SIGNIFICANT.
The mean has INCREASED by 0.16 hours/week after the use of Time Recorder.

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [27]:
# Normalised Values #

NTWH_NTR = NonTimeRecorderPilotData['NTWH'].dropna()
NTWH_TR = TimeRecorderPilotData['NTWH'].dropna()

ReportTestResults(NTWH_NTR, 
                  NTWH_TR, 
                  TestArgument='smaller', 
                  OutlierRemoval=True, 
                  Normalised=True, 
                  BillableRatio=False, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = -0.964
p-value = 0.1675

Conclusion:
-----------

The difference in the mean scores of the groups (-0.3602, -0.3467) is NOT SIGNIFICANT.

The mean for LONDON has INCREASED by 0.47 hours/week after the use of Time Recorder
Whereas the mean for OTHER LOCATIONS has INCREASED by 0.54 hours/week after the use of Time Recorder

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [28]:
# Hypothesis II #
# “The amount of billable time will go up measurably for those using Time Recorder.”

In [29]:
# Actual Values #

TWBH_NTR = NonTimeRecorderPilotData['TWBH'].dropna()
TWBH_TR = TimeRecorderPilotData['TWBH'].dropna()

ReportTestResults(TWBH_NTR, 
                  TWBH_TR, 
                  TestArgument='smaller', 
                  OutlierRemoval=True, 
                  Normalised=False, 
                  BillableRatio=False, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = -0.975
p-value = 0.1649

Conclusion:
-----------

The difference in the mean scores of the groups (24.6641, 25.2023) is NOT SIGNIFICANT.
The mean has INCREASED by 0.54 hours/week after the use of Time Recorder.

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [30]:
# Normalised Values #

NTWBH_NTR = NonTimeRecorderPilotData['NTWBH'].dropna()
NTWBH_TR = TimeRecorderPilotData['NTWBH'].dropna()

ReportTestResults(NTWBH_NTR, 
                  NTWBH_TR, 
                  TestArgument='smaller', 
                  OutlierRemoval=True, 
                  Normalised=True, 
                  BillableRatio=False, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = -1.528
p-value = 0.0632

Conclusion:
-----------

The difference in the mean scores of the groups (-0.3750, -0.3533) is SIGNIFICANT.

The mean for LONDON has INCREASED by 0.76 hours/week after the use of Time Recorder
Whereas the mean for OTHER LOCATIONS has INCREASED by 0.87 hours/week after the use of Time Recorder

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [31]:
# Hypothesis IV #
# “The proportion of billable time relative to the total time recorded, will go up measurably for those using the Time Recorder.”

In [32]:
# Actual Values #

WBR_NTR = NonTimeRecorderPilotData['WBR'].dropna()
WBR_TR = TimeRecorderPilotData['WBR'].dropna()

ReportTestResults(WBR_NTR, 
                  WBR_TR, 
                  TestArgument='smaller', 
                  OutlierRemoval=True, 
                  Normalised=False, 
                  BillableRatio=True, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = -2.065
p-value = 0.0195

Conclusion:
-----------

The difference in the mean scores of the groups (0.9964, 0.9985) is SIGNIFICANT.
The ratio has INCREASED by 0.002 (0.2%) after the use of Time Recorder.

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [33]:
# Hypothesis V #
# “Time is being recorded more accurately for those using Time Recorder.”

In [34]:
# Actual Values #

NWAC_NTR = NonTimeRecorderPilotData['NWAC'].dropna()
NWAC_TR = TimeRecorderPilotData['NWAC'].dropna()

ReportTestResults(NWAC_NTR, 
                  NWAC_TR, 
                  TestArgument='larger', 
                  OutlierRemoval=True, 
                  Normalised=False, 
                  BillableRatio=False, 
                  ActivityChange=True)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = 3.254
p-value = 0.0006

Conclusion:
-----------

The difference in the mean scores of the groups (21.4997, 19.2417) is SIGNIFICANT.
The mean has DECREASED by 2.26 activity/week after the use of Time Recorder.

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [35]:
# Normalised Values #

NWACpH_NTR = NonTimeRecorderPilotData['NWACpH'].dropna()
NWACpH_TR = TimeRecorderPilotData['NWACpH'].dropna()

ReportTestResults(NWACpH_NTR, 
                  NWACpH_TR, 
                  TestArgument='larger', 
                  OutlierRemoval=True, 
                  Normalised=True, 
                  BillableRatio=False, 
                  ActivityChange=True)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = 3.041
p-value = 0.0012

Conclusion:
-----------

The difference in the mean scores of the groups (0.5423, 0.4889) is SIGNIFICANT.

The mean for LONDON has DECREASED by 1.87 activity/week after the use of Time Recorder
Whereas the mean for OTHER LOCATIONS has DECREASED by 2.14 activity/week after the use of Time Recorder

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [36]:
# Hypothesis VI #
# “The amount of non-billable time will go down measurably for those using Time Recorder.”

In [37]:
# Actual Values #

TWNBH_NTR = NonTimeRecorderPilotData['TWNBH'].dropna()
TWNBH_TR = TimeRecorderPilotData['TWNBH'].dropna()

ReportTestResults(TWNBH_NTR, 
                  TWNBH_TR, 
                  TestArgument='larger', 
                  OutlierRemoval=True, 
                  Normalised=False, 
                  BillableRatio=False, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = 0.269
p-value = 0.3938

Conclusion:
-----------

The difference in the mean scores of the groups (2.1882, 2.0529) is NOT SIGNIFICANT.
The mean has DECREASED by 0.14 hours/week after the use of Time Recorder.

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-


In [38]:
##### Normalised Values #

NTWNBH_NTR = NonTimeRecorderPilotData['NTWNBH'].dropna()
NTWNBH_TR = TimeRecorderPilotData['NTWNBH'].dropna()

ReportTestResults(NTWNBH_NTR, 
                  NTWNBH_TR, 
                  TestArgument='larger', 
                  OutlierRemoval=True, 
                  Normalised=True, 
                  BillableRatio=False, 
                  ActivityChange=False)


2-Sample One-Sided Independent z-Test With Un-Equal Variance & Sample Sizes
                             Outliers Removed
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-

t-value = 0.372
p-value = 0.3551

Conclusion:
-----------

The difference in the mean scores of the groups (-0.9421, -0.9471) is NOT SIGNIFICANT.

The mean for LONDON has DECREASED by 0.18 hours/week after the use of Time Recorder
Whereas the mean for OTHER LOCATIONS has DECREASED by 0.20 hours/week after the use of Time Recorder

Confidence Interval: 95%
-.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.--.-
