In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.stats import ttest_ind,f_oneway,chi2_contingency,kstest
from sklearn.datasets import load_iris

In [None]:
# Load the iris dataset

data = load_iris()
df = pd.DataFrame  (data.data, columns=data.feature_names)
df['species']=data.target
df['species']=df['species'].map({0:'setosa',1:'versicolor',2:'virginica'})

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [None]:
def perform_hypothesis_test(stat,p_value,alpha=0.05,null_hypothesis="",
                            alternative_hypothesis=""):
    """

    perform hypothesis testing using a common decision rule for the p-value.

    parameters:
    -stat : The test statistic.
    -p_value : The p-value obtained from the test.
    -alpha :The significance level (default is 0.05).
    -null_hypothesis : Description of the null hypothesis.

    -alternative_hypothesis :Description of the alternative hypothesis.
    """

    # Decision Rule : common for all test
    decision = (
        f"Reject the null hypothesis: {alternative_hypothesis}"
        if p_value < alpha
        else f"Fail to reject the null hypothesis: {null_hypothesis}"
    )

    return {
        "statistic": stat, # Changed stst to stat
        "p_value": p_value,
        "alpha  ": alpha,
        "null_hypothesis": null_hypothesis,
        "alternative_hypothesis": alternative_hypothesis,
        "decision": decision,
    }

# Z-Test(one-Sample)

compare mean of a sample to a known population mean

Test if the mean sepal length of Versicolor is significantly diffrent from the population of 6.0 (hypothetical population mean).

* Null Hypothesis (HO): The mean sepal length of versicolor is equal to 6.0.
* Altrenative Hypothesis (H1): The mean sepal length of versicolor is not equal to 6.0.

In [None]:
from statsmodels.stats.weightstats import ztest

#Data prepration
versicolor_sepal_length = df[df['species'] == 'versicolor']['sepal length (cm)']

# perform one-sample Z-Test
# population mean = 6.0
stat,p_value = ztest(versicolor_sepal_length,value=6.0)

# Use the generalized decision function
results_z_test = perform_hypothesis_test(stat=stat,p_value=p_value,
                                         alpha=0.05,
                                         null_hypothesis="The mean sepal length of versicolor is equal to 6.0",
                                         alternative_hypothesis="The mean sepal length of versicolor is not equal to 6.0")

print("One-Sample Z-Test Results:")
results_z_test


One-Sample Z-Test Results:


{'statistic': -0.8767408688641777,
 'p_value': 0.3806274030991602,
 'alpha  ': 0.05,
 'null_hypothesis': 'The mean sepal length of versicolor is equal to 6.0',
 'alternative_hypothesis': 'The mean sepal length of versicolor is not equal to 6.0',
 'decision': 'Fail to reject the null hypothesis: The mean sepal length of versicolor is equal to 6.0'}

# Z-Test(Two-Sample)

Compare means of two independent groups

scenario:Test if the mean petal width of setosa is significantly diffrent from virginica.

* Null Hypothesis (Ho):The mean petal width of setosa is equal to the mean petal width of virginica.
* Altranative Hypothesis (H1):The mean petal width of setosa is not equal to the mean petal width of virginica.

In [None]:
# Data Prepration
setosa_petal_width = df[df['species'] == 'setosa']['petal width (cm)']
virginica_petal_width = df[df['species'] == 'virginica']['petal width (cm)']

# perform Two-Sample Z-Test
stat,p_value = ztest(setosa_petal_width,virginica_petal_width)

# Use the generalized decision function
results_z_test = perform_hypothesis_test(stat=stat,p_value=p_value,alpha=0.05,
                                        null_hypothesis="The  mean petal width of setosa is equal to the mean petal width of virginica.",
                                         alternative_hypothesis="The  mean petal width of setosa is not equal to the mean petal width of virginica.")

print("Two-Sample Z-Test Results:")
results_z_test

Two-Sample Z-Test Results:


{'statistic': -42.7857975196172,
 'p_value': 0.0,
 'alpha  ': 0.05,
 'null_hypothesis': 'The  mean petal width of setosa is equal to the mean petal width of virginica.',
 'alternative_hypothesis': 'The  mean petal width of setosa is not equal to the mean petal width of virginica.',
 'decision': 'Reject the null hypothesis: The  mean petal width of setosa is not equal to the mean petal width of virginica.'}

#Anova:

Compare means of multiple groups

scenario:compare the sepal widths of all three species (setosa,Versicolor,andVirginica).
* Null Hypothesis(h0):The mean sepal widths of all three species are equal.
* Altreanative Hypothesis (H1): At least one species has a diffrent mean sepal width.

In [None]:
from scipy.stats import f_oneway

# data prepration
setosa_sepal_width = df[df['species'] == 'setosa']['sepal width (cm)']
versicolor_sepal_width = df[df['species'] == 'versicolor']['sepal width (cm)']
virginica_sepal_width = df[df['species'] == 'virginica']['sepal width (cm)']
