In [1]:
import pandas as pd
import scipy.stats as stats
import numpy as np

## Hypothesis Testing

### Available Data

In [2]:
pop_mean = 1000 + (5 * 600)    # 'x' is number of units sold i.e. 600
sample_mean = 3050
sample_std = 25 * 5            
n = 25                         # n is sample size
alpha = 0.05

In [3]:
# To check if difference in pop_mean and sample_mean is actually significant or just due to sampling error,
# we consider alpha_value (standard being 0.05)

### Hypothesis Statement

In [4]:
# Null Hypothesis(H0) = No change needed as the weekly operating cost is not higher
# Alternate Hypothesis(Ha) = Weekly operating cost is higher than the observed cost according to the model

In [5]:
# As alternate hypothesis is directional as it mentions only one condition (cost being higher), 
# it is "one tail" alternate hypothesis

### Calculating Test Statistic

In [6]:
# We will be using z-test to calculate the critical value as mentioned in problem statement.
# usually if sample size is less than 30 and population std deviation is not available, we use t-test.

In [7]:
# Calculate z statistic value
# formula:- 
z_test = ((sample_mean - pop_mean) / (sample_std / (np.sqrt(n)))) 

In [8]:
z_test

-38.0

### Calculating Critical Value

In [9]:
critical_value = stats.norm.ppf(1- alpha)

In [10]:
critical_value

1.6448536269514722

### Results

In [11]:
print('z_value:', z_test) 
print('critical value:', critical_value)
print('alpha value:', alpha)

z_value: -38.0
critical value: 1.6448536269514722
alpha value: 0.05


### Conclusion

In [12]:
# if z_test value > critical value, we reject null hypothesis and vice versa

In [13]:
if critical_value < z_test:
    print("We Reject Null hypothesis i.e \n Their is strong evidence that weekly operating costs might be higher than observed cost.")
else:
    print("We fail to reject null hypothesis i.e No substantial evidence that weeekly operating cost is higher than observed costs.")

We fail to reject null hypothesis i.e No substantial evidence that weeekly operating cost is higher than observed costs.


## Chi2 Test

In [14]:
from scipy.stats import chi2_contingency

In [15]:
df = pd.read_csv("https://raw.githubusercontent.com/PrathameshR10/Assignments/main/Folder/chi2data.csv")

### Objective

In [16]:
# To use the Chi-Square test for independence to determine if there's a significant association 
#  between the type of smart home device purchased (Smart Thermostats vs. Smart Lights) and the customer satisfaction level.

In [17]:
df = pd.read_csv("https://raw.githubusercontent.com/PrathameshR10/Assignments/main/Folder/chi2data.csv")
df

Unnamed: 0,Satisfaction,Smart Thermostat,Smart Light,Total
0,Very Satisfied,50,70,120
1,Satisfied,80,100,180
2,Neutral,60,90,150
3,Unsatisfied,30,50,80
4,Very Unsatisfied,20,50,70
5,Total,240,360,600


### Hypothesis

In [18]:
# Null Hypothesis(H0) = Their is no significant association between type of device and satisfaction level.
# Alternate Hypothesis(Ha) = Their is relation between those two variables.

### Available Data

In [19]:
obs = np.array([[50, 70],
                 [80, 100],
                 [60, 90],
                 [30, 50],
                 [20, 50]])
alpha = 0.05

In [20]:
obs               # 'obs' is observed value in the table

array([[ 50,  70],
       [ 80, 100],
       [ 60,  90],
       [ 30,  50],
       [ 20,  50]])

### Calculating Chi2 statistic value

In [21]:
chi, p, dof, exp = chi2_contingency(obs) 

In [22]:
exp             # This is expected value if Null hypothesis was true i.e columns are independent

array([[ 48.,  72.],
       [ 72., 108.],
       [ 60.,  90.],
       [ 32.,  48.],
       [ 28.,  42.]])

In [23]:
p            # This is p value or critical value

0.22784371130697179

In [24]:
dof           # This is degrees of freedom

4

In [25]:
chi          # This is chi statistic value

5.638227513227513

In [26]:
if p < alpha:
    print('We reject Null Hypothesis(H0), Their is no significant association between type of device and satisfaction level')
else:
    print('We fail to reject Null Hypothesis')
    print("There's a significant association between the type of smart home device purchased "
          "and the customer satisfaction level.")

We fail to reject Null Hypothesis
There's a significant association between the type of smart home device purchased and the customer satisfaction level.
