[Reference](https://nathanrosidi.medium.com/commonly-used-statistical-tests-in-data-science-93787568eb36)

# T-Test

In [1]:
import numpy as np
from scipy import stats

# Sample data: Group A and Group B
group_a = np.random.normal(5.0, 1.5, 30)
group_b = np.random.normal(6.0, 1.5, 30)

# Performing an Independent T-test
t_stat, p_val = stats.ttest_ind(group_a, group_b)
print(f"T-Statistic: {t_stat}, P-Value: {p_val}")

T-Statistic: -1.243907870881994, P-Value: 0.21853742300977272


# Chi-Square Test

In [2]:
from scipy.stats import chi2_contingency
import numpy as np

# Example data: Gender vs. Movie Preference
data = np.array([[30, 10], [5, 25]])
chi2, p, dof, expected = chi2_contingency(data)
print(f"Chi2 Statistic: {chi2}, P-value: {p}")

Chi2 Statistic: 21.058333333333334, P-value: 4.455110459655313e-06


# ANOVA (Analysis of Variance)

In [3]:
from scipy import stats
import numpy as np

# Sample data: Three different groups
group1 = np.random.normal(5.0, 1.5, 30)
group2 = np.random.normal(6.0, 1.5, 30)
group3 = np.random.normal(7.0, 1.5, 30)

# Performing One-Way ANOVA
f_stat, p_val = stats.f_oneway(group1, group2, group3)
print(f"F-Statistic: {f_stat}, P-Value: {p_val}")

F-Statistic: 13.531525828513411, P-Value: 7.643294139106177e-06


# Pearson Correlation

In [4]:
import numpy as np
from scipy.stats import pearsonr

# Sample data
x = np.array([10, 20, 30, 40, 50])
y = np.array([15, 25, 35, 45, 55])

# Calculating Pearson Correlation
corr, _ = pearsonr(x, y)
print(f"Pearson Correlation Coefficient: {corr}")

Pearson Correlation Coefficient: 1.0


# Mann-Whitney U Test

In [5]:
from scipy.stats import mannwhitneyu
import numpy as np

# Sample data: Two groups
group1 = np.random.normal(5.0, 1.5, 30)
group2 = np.random.normal(6.0, 1.5, 30)

# Performing Mann-Whitney U Test
u_stat, p_val = mannwhitneyu(group1, group2)
print(f"U Statistic: {u_stat}, P-Value: {p_val}")

U Statistic: 224.0, P-Value: 0.000856411554987191
