In [1]:
from scipy import stats
import pandas as pd

## Two Catalyst Example

In [2]:
x = [91.50, 94.18, 92.18, 95.39, 91.79, 89.07, 94.72, 89.21, 89.19, 90.95, 90.46, 93.21, 97.19, 97.04, 91.07, 92.75]

In [3]:
group = [1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2]

In [4]:
df = pd.DataFrame({"yield":x, "group":group})
df

Unnamed: 0,yield,group
0,91.5,1
1,94.18,1
2,92.18,1
3,95.39,1
4,91.79,1
5,89.07,1
6,94.72,1
7,89.21,1
8,89.19,2
9,90.95,2


In [5]:
# Calculate test statistics using stats.ttest_ind()
indTest = stats.ttest_ind(df[df["group"]==1]["yield"], df[df["group"]==2]["yield"])
indTest

Ttest_indResult(statistic=-0.3535908643461798, pvalue=0.7289136186068217)

scipy.stats.ttest_ind(a, b, axis=0, equal_var=True, nan_policy='propagate', permutations=None, random_state=None, alternative='two-sided', trim=0)[source]
Calculate the T-test for the means of two independent samples of scores.

This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations have identical variances by default.

Parameters
a, barray_like
The arrays must have the same shape, except in the dimension corresponding to axis (the first, by default).

axis:int or None, optional
Axis along which to compute test. If None, compute over the whole arrays, a, and b.

equal_varbool, optional
If True (default), perform a standard independent 2 sample test that assumes equal population variances [1]. If False, perform Welch’s t-test, which does not assume equal population variance [2].

New in version 0.11.0.

nan_policy{‘propagate’, ‘raise’, ‘omit’}, optional
Defines how to handle when input contains nan. The following options are available (default is ‘propagate’):

‘propagate’: returns nan

‘raise’: throws an error

‘omit’: performs the calculations ignoring nan values

The ‘omit’ option is not currently available for permutation tests or one-sided asympyotic tests.

permutationsnon-negative int, np.inf, or None (default), optional
If 0 or None (default), use the t-distribution to calculate p-values. Otherwise, permutations is the number of random permutations that will be used to estimate p-values using a permutation test. If permutations equals or exceeds the number of distinct partitions of the pooled data, an exact test is performed instead (i.e. each distinct partition is used exactly once). See Notes for details.

New in version 1.7.0.

random_state{None, int, numpy.random.Generator,
numpy.random.RandomState}, optional

If seed is None (or np.random), the numpy.random.RandomState singleton is used. If seed is an int, a new RandomState instance is used, seeded with seed. If seed is already a Generator or RandomState instance then that instance is used.

Pseudorandom number generator state used to generate permutations (used only when permutations is not None).

New in version 1.7.0.

alternative{‘two-sided’, ‘less’, ‘greater’}, optional
Defines the alternative hypothesis. The following options are available (default is ‘two-sided’):

‘two-sided’

‘less’: one-sided

‘greater’: one-sided

New in version 1.6.0.

trimfloat, optional
If nonzero, performs a trimmed (Yuen’s) t-test. Defines the fraction of elements to be trimmed from each end of the input samples. If 0 (default), no elements will be trimmed from either side. The number of trimmed elements from each tail is the floor of the trim times the number of elements. Valid range is [0, .5).

New in version 1.7.

Returns
statisticfloat or array
The calculated t-statistic.

pvaluefloat or array
The two-tailed p-value.

In [6]:
# Calculate test statistics using stats.ttest_ind_from_stats()
indTest = stats.ttest_ind_from_stats(mean1=92.255, std1=2.39, nobs1=8,
                     mean2=92.733, std2=2.98, nobs2=8)

indTest

Ttest_indResult(statistic=-0.353922390875553, pvalue=0.7286705530970293)

scipy.stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True, alternative='two-sided')
T-test for means of two independent samples from descriptive statistics.

This is a two-sided test for the null hypothesis that two independent samples have identical average (expected) values.

Parameters
mean1array_like
The mean(s) of sample 1.

std1array_like
The standard deviation(s) of sample 1.

nobs1array_like
The number(s) of observations of sample 1.

mean2array_like
The mean(s) of sample 2.

std2array_like
The standard deviations(s) of sample 2.

nobs2array_like
The number(s) of observations of sample 2.

equal_varbool, optional
If True (default), perform a standard independent 2 sample test that assumes equal population variances [1]. If False, perform Welch’s t-test, which does not assume equal population variance [2].

alternative{‘two-sided’, ‘less’, ‘greater’}, optional
Defines the alternative hypothesis. The following options are available (default is ‘two-sided’):

‘two-sided’

‘less’: one-sided

‘greater’: one-sided

New in version 1.6.0.

Returns
statisticfloat or array
The calculated t-statistics.

pvaluefloat or array
The two-tailed p-value.

In [7]:
indTest.statistic

-0.353922390875553

In [8]:
indTest.pvalue

0.7286705530970293

In [9]:
# Decision
alpha = 0.05

if indTest.pvalue < alpha:
    print("Reject the Null")
else:
    print("Fail to reject")

Fail to reject


## Blood Pressure Example

In [10]:
stats.ttest_ind_from_stats(mean1=10.67, std1=3.895, nobs1=500,
                          mean2=7.83, std2=4.224, nobs2=400, alternative='greater')

Ttest_indResult(statistic=10.4676374161062, pvalue=1.4200116519875262e-24)

In [11]:
stats.ttest_ind_from_stats(mean1=10.67, std1=3.895, nobs1=5,
                          mean2=7.83, std2=4.224, nobs2=4, alternative='greater')

Ttest_indResult(statistic=1.048112362171905, pvalue=0.16471063434733127)