In [1]:
import numpy as np
import scipy.stats as stats

In [2]:
def stats_ind(ds: np.ndarray, alpha: float):
    sum_row = np.sum(ds, axis=1).reshape((-1, 1))
    ds = np.hstack((ds, sum_row))
    sum_col = np.sum(ds, axis=0)
    ds = np.vstack((ds, sum_col))

    x = 0
    i_row = ds.shape[0] - 1
    j_col = ds.shape[1] - 1
    for i in range(i_row + 1):
        for j in range(j_col + 1):
            nij = ds[i, j_col] * ds[i_row, j] / ds[-1, -1]
            x += ((ds[i, j] - nij) ** 2) / nij

    df = (i_row - 1) * (j_col - 1)
    p_value = 1 - stats.chi2.cdf(x, df)
    conclusion = ('Rejected'
                  if x > stats.chi2.ppf(1 - alpha, df)
                  else 'Not rejected')

    return x, p_value, conclusion

### 14.19

In [3]:
data = np.array([
    [15, 29],
    [27, 19]
])
stats_ind(data, 0.01)

(5.470161631846418, 0.019343836490613553, 'Not rejected')

### 14.20

In [4]:
data = np.array([
    [162, 118, 451, 18],
    [310, 196, 996, 25],
    [258, 193, 458, 10],
    [280, 175, 390, 19]
])
stats_ind(data, 0.01)

(124.52971269330236, 0.0, 'Rejected')

### 14.21

In [5]:
data = np.array([
    [21, 36, 30],
    [48, 26, 19]
])
stats_ind(data, 0.05)

(14.463579015563466, 0.0007232254868159149, 'Rejected')

### 14.3 Kiểm định Mann-Whitney trường hợp mẫu độc lập

In [6]:
def mann_whitney(df1: np.ndarray, df2: np.ndarray) -> dict[str, int]:
    if np.size(df1) > np.size(df2):
        df1, df2 = df2, df1

    n1 = np.size(df1)
    n2 = np.size(df2)

    merge = np.sort(np.concatenate((df1, df2)))
    w1 = 0
    for i, v in enumerate(merge):
        if v in df1:
            w1 += i

    w2 = int((n1 + n2) * (n1 + n2 + 1) / 2 - w1)
    return {'W1': w1, 'W2': w2}

#### Bài tập 14.10 
An electrical engineer must design a circuit to deliver the maximum
amount of current to a display tube to achieve sufficient image brightness. Within
her allowable design constraints, she has developed two candidate circuits and tests
prototypes of each. The resulting data (in microamperes) are as follows:
a. Use the Wilcoxon rank-sum test to test H0 : µ1 = µ2 the alternative H1 : µ1 > µ2. Use α = 0.025.
b. Use the normal approximation for the Wilcoxon rank-sum test. Assume that α = 0.05. Find the approximate p-value for this test statistic.

In [7]:
data1 = np.array([251, 255, 258, 257, 250, 251, 254, 250, 248])
data2 = np.array([250, 253, 249, 256, 259, 252, 260, 251])
mann_whitney(data1, data2)

{'W1': 88, 'W2': 65}

#### Bài tập 14.11 
One of the authors travels regularly to Seattle, Washington. He uses
either Delta or Alaska airline. Flight delays are sometimes unavoidable, but he would
be willing to give most of his business to the airline with the best on-time arrival record.
The number of minutes that his flight arrived late for the last six trips on each airline
follows. Is there evidence that either airline has superior on-time arrival performance?
Use α = 0.01 and the Wilcoxon rank-sum test

In [8]:
data1 = np.array([13, 10, 1, -4, 0, 9])
data2 = np.array([15, 8, 3, -1, -2, 4])
mann_whitney(data1, data2)

{'W1': 34, 'W2': 44}

#### Bài tập 14.12 
The manufacturer of a hot tub is interested in testing two different
heating elements for its product. The element that produces the maximum heat gain
after 15 minutes would be preferable. The manufacturer obtains 10 samples of each
heating unit and tests each one. The heat gain after 15 minutes (in ◦F) follows.
a. Is there any reason to suspect that one unit is superior to the other? Use α = 0.05
and the Wilcoxon ranksum test.
b. Use the normal approximation for the Wilcoxon ranksum test. Assume that α =
0.05. What is the approximate p-value for this test statistic?

In [9]:
data1 = np.array([25, 27, 29, 31, 30, 26, 24, 32, 33, 38])
data2 = np.array([31, 33, 32, 35, 34, 29, 38, 35, 37, 30])
mann_whitney(data1, data2)

{'W1': 128, 'W2': 82}

#### Bài tập 14.13 
Random samples of size n1 = 8 and n2 = 8 were selected from populations A and B, respectively. The data are given in the following table.
a. Test for a difference in the medians of the two populations using an a α = 0.05
Wilcoxon rank sum test.
b. Place a 95% confidence interval on the difference in the medians of the two populations.

In [10]:
data1 = np.array([4.3, 4.6, 4.7, 5.1, 5.3, 5.3, 5.8, 5.4])
data2 = np.array([3.5, 3.8, 3.7, 3.9, 4.4, 4.7, 5.2, 4.4])
mann_whitney(data1, data2)

{'W1': 92, 'W2': 44}