In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats as st


def std_sample_mean(s_population, n):
    """For a sample of size n, calculate the standard deviation of the sample mean,
    given the standard deviation of the population.
    """
    return s_population / np.sqrt(n)


def ci(mean, std, confidence):
    '''Calculate the confidence interval for the specified normal distribution of N(mean, std)
    at given two-sided confidence level.
    '''
    two_sided_confidence = confidence + (1 - confidence) / 2
    std_error = st.norm.ppf(two_sided_confidence)
    return mean - std_error * std, mean + std_error * std


def ci_t(mean, std, df, confidence):
    '''Calculate the confidence interval for the specified t distribution of N(mean, std)
    at given two-sided confidence level.
    '''
    two_sided_confidence = confidence + (1 - confidence) / 2
    std_error = st.t.ppf(two_sided_confidence, df)
    return mean - std_error * std, mean + std_error * std


## Coefficient of determination r<sup>2</sup>

In [2]:
def r2(t, df):
    """Return the coefficient of determination given the t-statistic of a t-test and the
    degrees of freedom df.
    """
    return t**2 / (t**2 + df)

In [3]:
r2(2, 20)

0.16666666666666666

In [12]:
mean_p = 151
mean_s = 126
n = 25
df = n - 1
std_s = 50

In [13]:
st.t.ppf(.95, df)

1.7108820799094275

In [14]:
std_sm = std_s / np.sqrt(n)

print(std_sm)

10.0


In [15]:
z = (mean_s - mean_p) / std_sm
print(z)

-2.5


In [16]:
cohen_d = (mean_s - mean_p) / std_s
print(cohen_d)

-0.5


In [17]:
r2(-2.5, df)

0.2066115702479339

In [18]:
ci_t(mean_s, std_sm, df, .95)

(105.36101438371979, 146.6389856162802)

In [19]:
146.64-126

20.639999999999986

In [33]:
pre = np.array([8, 7, 6, 9, 10, 5, 7, 11, 8, 7])
post = np.array([5, 6, 4, 6, 5, 3, 2, 9, 4, 4])
diff = post - pre

n = len(pre)
df = n - 1

In [34]:
mean_pre = pre.mean()
mean_post = post.mean()
mean_diff = diff.mean()

In [35]:
print(mean_pre, mean_post, mean_diff)

7.8 4.8 -3.0


In [36]:
st.t.ppf(.95, df)

1.8331129326536335

In [32]:
std_diff = 1.33

In [38]:
std_diffm = std_diff / np.sqrt(n)
print(std_diffm)

0.42058292880239445


In [39]:
z = mean_diff / std_diffm
print(z)

-7.132957128199352


In [40]:
cohen_d = mean_diff / std_diff
print(cohen_d)

-2.255639097744361


In [41]:
ci_t(mean_diff, std_diffm, df, .95)

(-3.9514246849169212, -2.0485753150830788)