In [1]:
import scipy.stats as stats

In [2]:
from scipy.stats import t

# Set the t-value and degrees of freedom
t_value = 1
df = 5  # Replace this with your specific degrees of freedom

# Calculate the CDF value
cdf_value = t.cdf(t_value, df)
print(cdf_value*2)


1.6367825323508776


In [3]:
# The Shapiro-Wilk test returns two values: the test statistic (W) and the p-value. 
# If the p-value is greater than your chosen significance level (α = 0.05), 
# you can assume the data comes from a normally distributed population. 
# If the p-value is less than or equal to the significance level, the normality assumption is not met, 
# and you should consider applying a data transformation or using a non-parametric test like the Mann-Whitney U test.


from scipy.stats import shapiro

# Input the data as lists
desktop_users = [12, 15, 18, 16, 20, 17, 14, 22, 19, 21, 23, 18, 25, 17, 16, 24, 20, 19, 22, 18, 15, 14, 23, 16, 12, 21, 19, 17, 20, 14]
mobile_users = [10, 12, 14, 13, 16, 15, 11, 17, 14, 16, 18, 14, 20, 15, 14, 19, 16, 15, 17, 14, 12, 11, 18, 15, 10, 16, 15, 13, 16, 11]

# Perform the Shapiro-Wilk test for both desktop and mobile users
shapiro_desktop = shapiro(desktop_users)
shapiro_mobile = shapiro(mobile_users)

print("Shapiro-Wilk test for desktop users:", shapiro_desktop)
print("Shapiro-Wilk test for mobile users:", shapiro_mobile)

Shapiro-Wilk test for desktop users: ShapiroResult(statistic=0.9783115512411942, pvalue=0.7791003299808725)
Shapiro-Wilk test for mobile users: ShapiroResult(statistic=0.9714355768676655, pvalue=0.5791606602037616)


In [4]:
# If the p-value from Levene's test is greater than your chosen significance level (α = 0.05), you can assume equal variances
#  If the p-value is less than or equal to the significance level, the assumption of equal variances is not met, 
# and you should consider using Welch's t-test instead of the regular independent two-sample t-test.

from scipy.stats import levene

# Input the data as lists
desktop_users = [12, 15, 18, 16, 20, 17, 14, 22, 19, 21, 23, 18, 25, 17, 16, 24, 20, 19, 22, 18, 15, 14, 23, 16, 12, 21, 19, 17, 20, 14]
mobile_users = [10, 12, 14, 13, 16, 15, 11, 17, 14, 16, 18, 14, 20, 15, 14, 19, 16, 15, 17, 14, 12, 11, 18, 15, 10, 16, 15, 13, 16, 11]

# Perform Levene's test
levene_test = levene(desktop_users, mobile_users)
print(levene_test)


LeveneResult(statistic=2.94395488191752, pvalue=0.09153720526741756)


1. **One-sample t-test**
2. **Two-sample t-test**
3. **Paired sample t-test**
4. **Chi-squared test**
5. **ANOVA**
6. **Mann-Whitney U test**

In [7]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

# Set seed for reproducibility
np.random.seed(0)

# Generate random data
data1 = np.random.normal(loc=0, scale=1, size=100)  # Group 1
data2 = np.random.normal(loc=0.5, scale=1, size=100)  # Group 2
data3 = np.random.normal(loc=1, scale=1, size=100)  # Group 3
data4 = np.random.normal(loc=1.5, scale=1, size=100)  # Group 4

In [8]:
### 1. One-sample t-test

### A one-sample t-test checks if the mean of a single group is different from a known population mean.

# One-sample t-test
pop_mean = 0
t_stat, p_val = stats.ttest_1samp(data1, pop_mean)
print("One-sample t-test:")
print(f"T-statistic: {t_stat}, P-value: {p_val}")

One-sample t-test:
T-statistic: 0.5904283402851698, P-value: 0.5562489158694675


In [10]:
### 2. Two-sample t-test

### A two-sample t-test checks if the means of two independent groups are different.
# Two-sample t-test
t_stat, p_val = stats.ttest_ind(data1, data2)
print("\nTwo-sample t-test:")
print(f"T-statistic: {t_stat}, P-value: {p_val}")


Two-sample t-test:
T-statistic: -3.597192759749613, P-value: 0.0004062796020362524


In [11]:
### 3. Paired sample t-test

### A paired sample t-test checks if the means of two related groups are different.


# For the paired test, generate some related data
paired_data1 = np.random.normal(loc=0, scale=1, size=30)
paired_data2 = paired_data1 + np.random.normal(loc=0.1, scale=0.5, size=30)  # Related data

# Paired t-test
t_stat, p_val = stats.ttest_rel(paired_data1, paired_data2)
print("\nPaired sample t-test:")
print(f"T-statistic: {t_stat}, P-value: {p_val}")


Paired sample t-test:
T-statistic: -0.9849155353858514, P-value: 0.332812052295149


In [12]:
### 4. Chi-squared test

### A chi-squared test checks the relationship between categorical variables. Here, we'll create some categorical data.


# Create categorical data
observed = np.array([[10, 20], [20, 30]])
chi2, p_val, dof, expected = stats.chi2_contingency(observed)

print("\nChi-squared test:")
print(f"Chi-squared: {chi2}, P-value: {p_val}")


Chi-squared test:
Chi-squared: 0.128, P-value: 0.7205147871362552


In [13]:
### 5. ANOVA

## ANOVA tests if there are significant differences between the means of three or more independent groups.

# ANOVA
f_stat, p_val = stats.f_oneway(data1, data2, data3, data4)
print("\nANOVA test:")
print(f"F-statistic: {f_stat}, P-value: {p_val}")


ANOVA test:
F-statistic: 28.880768176933508, P-value: 6.597598165696173e-17


In [14]:
### 6. Mann-Whitney U test

## The Mann-Whitney U test is a non-parametric test that checks if two independent samples come from the same distribution.


u_stat, p_val = stats.mannwhitneyu(data1, data2)
print("\nMann-Whitney U test:")
print(f"U-statistic: {u_stat}, P-value: {p_val}")


Mann-Whitney U test:
U-statistic: 3717.0, P-value: 0.0017264885889391286


### Conclusion

In this example, we covered various hypothesis tests with random data. Each test has its own assumptions and scenarios in which it is appropriate. It's essential to understand the underlying assumptions before choosing a test for your data analysis. You can run this code in your Python environment to see the results.