# Hypothesis Testing :Re-engineering Program Impact Analysis

In [1]:
#import the packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import re

import scipy.stats as stats

# set the graphs to show in the jupyter notebook
%matplotlib inline

# set seabor graphs to a better style
sns.set(style="ticks")

from dateutil.relativedelta import relativedelta

In [2]:
# Load the data
Treatment = pd.read_csv( 'Treatment_Facility.csv' )
Treatment

Unnamed: 0,Month,Reengineer,Employee_Turnover,VAR4,VAR5
0,1,Prior,0.0,24.390244,42.682927
1,2,Prior,6.0606,19.354839,25.806452
2,3,Prior,12.1212,35.087719,146.19883
3,4,Prior,3.3333,18.404908,110.429448
4,5,Prior,12.9032,17.964072,23.952096
5,6,Prior,9.6774,41.176471,47.058824
6,7,Prior,11.7647,13.422819,0.0
7,8,Prior,11.4286,31.25,25.0
8,9,Prior,23.0769,17.241379,132.183908
9,10,Prior,15.0,16.574586,16.574586


In [3]:
# Effect of Re-engineering program on Employment turnover.

**Null hypothesis (Ho): Reengineering program has no effect on the Employee turnover percentage.**
    
**Alternate Hypothesis (Ha) :  Reengineering program has an effect on the Employee turnover percentage.**
    
**Confidence Interval : 95% ; p-value : 0.05**

In [4]:
Prior = Treatment.loc[Treatment.Reengineer == 'Prior', 'Employee_Turnover']
Post  = Treatment.loc[Treatment.Reengineer == 'Post', 'Employee_Turnover']

print('Mean of Employee turnover percentage prior to Reengineering program:',Prior.mean())
print('Mean of Employee turnover percentage post to Reengineering program:',Post.mean())

Mean of Employee turnover percentage prior to Reengineering program: 11.735692307692307
Mean of Employee turnover percentage post to Reengineering program: 18.68912857142857


In [5]:
stats.f_oneway(Prior, Post)

F_onewayResult(statistic=3.133985828895568, pvalue=0.09361109345535291)

***p-value > 0.05, we cannot reject the null hypothesis, and hence we can say Re-engineering program has no effect on the Employee turnover percentage.***

In [6]:
# Effect of Re-engineering program on TRFF% (percent of residents who were temporarily removed from the facility.

**Null hypothesis (Ho): Reengineering program has no effect on the TRFF%.**
    
**Alternate Hypothesis (Ha) :  Reengineering program has an effect on the TRFF%.**
    
**Confidence Interval : 95% ; p-value : 0.05**

In [7]:
Prior_TRFF = Treatment.loc[Treatment.Reengineer == 'Prior', 'VAR4']
Post_TRFF  = Treatment.loc[Treatment.Reengineer == 'Post', 'VAR4']

print('Mean of TRFF percentage prior to Reengineering program:',Prior_TRFF.mean())
print('Mean of TRFF turnover percentage post to Reengineering program:',Post_TRFF.mean())

Mean of TRFF percentage prior to Reengineering program: 20.54033531733077
Mean of TRFF turnover percentage post to Reengineering program: 9.232041695285714


In [8]:
stats.f_oneway(Prior_TRFF, Post_TRFF)

F_onewayResult(statistic=7.747306925230095, pvalue=0.01226508213873439)

***p-value < 0.05, so we can reject the null hypothesis and hence we can say Re-engineering program has an impact on TRFF percentage ie. Re-engineering program has improved the incidence behavioral problems.*** 

In [9]:
# Effect of Re-engineering program on the Critical Incidence rate.

**Null hypothesis (Ho): Reengineering program has no effect on the Critical Incident Rate.**
    
**Alternate Hypothesis (Ha) :  Reengineering program has an effect on the Critical Incidence Rate..**
    
**Confidence Interval : 95% ; p-value : 0.05**

In [10]:
Prior_CI = Treatment.loc[Treatment.Reengineer == 'Prior', 'VAR5']
Post_CI  = Treatment.loc[Treatment.Reengineer == 'Post', 'VAR5']

print('Mean of Critical Incidence percentage prior to Reengineering program:',Prior_CI.mean())
print('Mean of Critical Incidence percentage post to Reengineering program:',Post_CI.mean())

Mean of Critical Incidence percentage prior to Reengineering program: 53.887906321846145
Mean of Critical Incidence percentage post to Reengineering program: 23.34971927988571


In [11]:
stats.f_oneway(Prior_CI, Post_CI)

F_onewayResult(statistic=2.650105376271948, pvalue=0.12091989189884142)

***p-value > 0.05, we cannot reject the null hypothesis and we can say Re-engineering program has no effect on the Critical Incidence rate.***