In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt

from statsmodels.stats.weightstats import ztest as ztest

# IQ

## Will drug improve IQ

In [2]:
mu_population = 100
sigma_population = 15

In [3]:
df_iq_drug = pd.read_csv("iq_data_drug.csv")

In [5]:
df_iq_drug.shape

(20, 1)

In [4]:
df_iq_drug.head()

Unnamed: 0,iq
0,110
1,91
2,95
3,94
4,97


In [8]:
df_iq_drug["iq"].mean() # "m" from our equation

102.65

In [12]:
# (sample mean - population mean) / (sample mean's std dev)
# (sample mean - population mean) / (sigma/root(n))

# Observed test statistic
ts = (df_iq_drug["iq"].mean() - mu_population) / (df_iq_drug["iq"].std() / np.sqrt(len(df_iq_drug)))
ts

1.5922685034947848

## Right tailed

In [13]:
p_value = 1 - stats.norm.cdf(ts)
p_value

0.055662193388278935

In [16]:
ztest(df_iq_drug["iq"], value=100, alternative="larger")

(1.5922685034947845, 0.05566219338827894)

In [25]:
# p_values > alpha (0.05)
# We accept null hypothesis

## Two-tailed

In [17]:
ztest(df_iq_drug["iq"], value=100)

(1.5922685034947845, 0.11132438677655788)

In [18]:
p_value = 2*(1 - stats.norm.cdf(ts))
p_value

0.11132438677655787

In [26]:
# p_value > alpha
# Accept H0

## Let us play with our data

In [56]:
our_data = np.random.randint(100, 110, size=200)
ztest(our_data, value=100)

(20.662746018030013, 7.497689965971804e-95)

# Two drugs

In [62]:
drug1 = np.random.randint(10, 15, size=200)
drug2 = np.random.randint(20, 30, size=150)

In [65]:
drug1.mean()

12.215

In [66]:
drug2.mean()

24.446666666666665

In [63]:
drug1

array([14, 13, 13, 12, 11, 12, 13, 13, 10, 12, 10, 12, 10, 13, 14, 14, 14,
       13, 14, 13, 13, 14, 14, 14, 12, 14, 12, 13, 12, 11, 14, 11, 12, 12,
       12, 10, 13, 14, 14, 13, 13, 11, 13, 12, 13, 13, 14, 14, 13, 12, 14,
       14, 12, 14, 14, 11, 10, 14, 14, 10, 10, 11, 12, 11, 13, 12, 10, 13,
       14, 13, 13, 13, 13, 11, 13, 13, 10, 14, 12, 11, 14, 10, 10, 11, 14,
       13, 14, 11, 13, 14, 10, 11, 13, 14, 14, 12, 12, 14, 10, 11, 14, 14,
       11, 14, 10, 14, 12, 13, 14, 12, 11, 14, 10, 11, 13, 12, 10, 13, 13,
       11, 10, 11, 12, 14, 10, 10, 11, 11, 10, 13, 12, 11, 10, 10, 11, 11,
       13, 13, 13, 12, 11, 10, 12, 14, 14, 13, 13, 11, 10, 14, 11, 13, 11,
       12, 11, 10, 13, 10, 14, 10, 14, 14, 13, 11, 12, 14, 14, 10, 14, 11,
       11, 12, 11, 12, 13, 11, 11, 11, 13, 10, 11, 14, 14, 14, 13, 10, 11,
       14, 14, 12, 10, 12, 13, 13, 12, 13, 10, 12, 10, 12])

In [64]:
drug2

array([29, 20, 26, 29, 25, 20, 29, 26, 29, 23, 23, 26, 22, 20, 29, 20, 20,
       25, 24, 22, 24, 29, 20, 23, 27, 23, 27, 29, 24, 21, 20, 25, 23, 21,
       27, 28, 28, 24, 24, 21, 22, 29, 26, 28, 24, 27, 27, 29, 20, 23, 25,
       20, 20, 28, 25, 28, 24, 22, 26, 20, 22, 20, 22, 21, 26, 20, 29, 27,
       21, 23, 24, 25, 25, 22, 21, 22, 24, 23, 24, 28, 21, 21, 24, 27, 26,
       29, 21, 24, 26, 27, 20, 20, 26, 28, 29, 27, 23, 22, 27, 27, 22, 28,
       21, 28, 25, 20, 27, 25, 22, 25, 21, 26, 20, 23, 29, 26, 26, 29, 27,
       28, 25, 24, 24, 27, 24, 24, 23, 26, 24, 22, 24, 26, 26, 20, 24, 25,
       28, 25, 24, 26, 22, 28, 25, 25, 25, 26, 29, 25, 20, 21])

In [58]:
# H0: mu1 = mu2
# H1: mu1 != mu2
# Two tailed
ztest(drug1, drug2)

(-55.15253273410719, 0.0)

In [59]:
# H0: mu1 = mu2
# H1: mu1 > mu2
# Right tailed
ztest(drug1, drug2, alternative="larger")

(-55.15253273410719, 1.0)

In [60]:
# H0: mu1 = mu2
# H1: mu1 < mu2
# Left tailed
ztest(drug1, drug2, alternative="smaller")

(-55.15253273410719, 0.0)

In [61]:
stats.ttest_ind(drug1, drug2)

Ttest_indResult(statistic=-55.15253273410719, pvalue=4.351871004116815e-174)