In [291]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_1samp,levene, shapiro
from statsmodels.stats.power import ttest_power
import matplotlib.pyplot as plt
import scipy.stats as stats


In [292]:
Titan=pd.read_csv('Titan.csv')

In [303]:
Titan['Difference']=Titan['NewScheme']-Titan['OldScheme']

In [304]:
Titan.describe()

Unnamed: 0,SalesPerson,OldScheme,NewScheme,Difference
count,30.0,30.0,30.0,30.0
mean,15.5,68.033333,72.033333,4.0
std,8.803408,20.45598,24.062395,14.081046
min,1.0,28.0,32.0,-34.0
25%,8.25,54.0,55.0,-6.75
50%,15.5,67.0,74.0,7.5
75%,22.75,81.5,85.75,16.0
max,30.0,110.0,122.0,25.0


In [305]:
#OldScheme Mean in thousands
OldMean=Titan['OldScheme'].mean()
OldMean

68.03333333333333

In [306]:
ExactMean = OldMean*1000
ExactMean

68033.33333333333

In [307]:
#NewScheme Mean in thousands
NewMean=Titan['NewScheme'].mean()
NewMean

72.03333333333333

In [308]:
ExactNewMean= NewMean*1000
ExactNewMean

72033.33333333333

In [309]:
#Since both are dependent varaibles we should go for paired t test ., but before that check for variances and normality of the population using 
#Levene and Shapiro tests.


In [310]:
#Levene Test
#Null Hypothesis : Variances are equal
#Alternative : Variances are not equal
levene(Titan['OldScheme'],Titan['NewScheme'])

LeveneResult(statistic=1.063061539437244, pvalue=0.30679836081811235)

In [311]:
#Pvalue is greater than 0.05 , so we do not reject null hypothesis

In [312]:
#Shapiro Test
#Null Hypothesis : Distribution is normal
#Alternative : Distribution is not normal

In [313]:
shapiro(Titan['Difference'])

(0.9396390914916992, 0.08894602954387665)

In [314]:
#P value is > 0.05 , we do not reject the null hypothesis

In [315]:
#Since all the conditions are satisfied ., we can go for paired t test
#For  test to check new scheme has significantly raised outputs the null and alternate hypothesis are described as below
#H0 = Mean(NewScheme)-Mean(OldScheme)<=0
#HA= Mean(Newscheme)-Mean(Oldscheme)>0
t_statistic, p_value = ttest_1samp(Titan['NewScheme']-Titan['OldScheme'],0)
print(t_statistic, p_value)

1.5559143823544377 0.13057553961337662


In [316]:
#Since it is a one tailed test ., we have to divide the p value by 2
p_value=p_value/2
p_value

0.06528776980668831

In [317]:
#For 95% confidence interval , since P value is greater than 0.05 , we do not reject the null hypothesis
#i.e., there is no evidence to prove that new scheme has significantly raised outputs

In [318]:
#the average output must increase by £5000 in the scheme compared to the old scheme. If this figure is alternative hypothesis, what is: 
#The probability of a type 1 error? (2.5 points)
#What is the p- value of the hypothesis test if we test for a difference of $5000? (10 points)
#Power of the test (5 points)


In [319]:
#The probability of type I error is alpha . i.e, for 95% confidence interval the probability of type I error is 0.05

In [320]:
#For the above test the null and alternate hypothesis are described as below.
#Null Hypothesis : Mean(NewScheme)-Mean(OldScheme)<=5
#Alternate Hypothesis : Mean(NewScheme)-Mean(OldScheme)>5 
#(Average output is increased by 5000 compared to old scheme) --> Mean(NewScheme)>Mean(Oldscheme)+5000

In [321]:
t_statistic, p_value = ttest_1samp(Titan['NewScheme']-Titan['OldScheme'],5)
print(t_statistic)

-0.3889785955886094


In [322]:
#Since it is a one tailed test , we should divide the p value by 2
p_value=p_value/2
p_value

0.3500667456306643

In [323]:
#Since the p value is > 0.05 , we do not reject the null hypothesis

In [324]:
#Power of the test (5 points)
effectsize=((Titan["NewScheme"]-Titan['OldScheme']).mean()-5)/(Titan["NewScheme"]-Titan['OldScheme']).std(ddof=1)

print(ttest_power(effectsize, nobs=30, alpha=0.05, alternative='larger')*100,'%')

2.143906672676188 %


In [None]:
#Theoretical way to calculate probability of type2 error (Beta)

In [None]:
#After Finding Beta , we will find the power by subtracting beta from 1

In [325]:
#power=1-beta

In [326]:
#For the first test ., Mu(NewScheme)-Mu(OldScheme)>0 , in order for the new scheme to siginificantly raise the output
#Null Hypothesis : Mu(NewScheme)-Mu(OldScheme)<=0
#AlternateHypothesis:Mu(NewScheme)-Mu(OldScheme)>0
#Find Out the critical value of t at 0.05 level of significance and 29 df
tcritical=stats.t.ppf(0.95,29)
#We will fail to reject the null hypothesis , as long as we get a t statistic less than tcritical


In [327]:
tcritical

1.6991270265334972

In [328]:
#find out the critical mean of difference between two schemes.

In [329]:
#Formula for calculating critical mean difference is ., criticalmeandifference=(tcritical*Standara error)+Mu(Difference)

In [330]:
#Here Mu(Difference)=0 as per the null hypothesis

In [331]:
#Substituting the values in the formula
CriticalMeanDifference = ((tcritical*(Titan['NewScheme']-Titan['OldScheme']).std(ddof=1))/np.sqrt(30))+0

In [332]:
CriticalMeanDifference

4.368176156228719

In [333]:
#we fail to reject the null hypothesis , as long as we draw the samples whose mean difference is less than 4.36

In [334]:
#Use the above value and find the tstatistic where mu(meandifference)=5

In [335]:
Standarderror=((Titan['NewScheme']-Titan['OldScheme']).std(ddof=1))/np.sqrt(30)

In [336]:
tstat=(CriticalMeanDifference-5)/Standarderror

In [337]:
tstat

-0.2457659514095499

In [338]:
#find out the pvalue for tstat -0.2457

In [339]:
pvalue=stats.t.cdf(tstat,df=29)

In [340]:
pvalue

0.4037972524794316

In [341]:
#the above p value is the probability of type II error (Beta)

In [342]:
#Power of the test=1-Beta

In [343]:
Power=1-pvalue

In [344]:
print((Power*100),'%')

59.620274752056844 %
