In [2]:
from scipy import stats
import pandas as pd
import numpy as np
from statsmodels.stats.weightstats import zconfint

In [3]:
water_data = pd.read_csv('watter.txt', sep='\t', header=0)
water_data.head()

Unnamed: 0,location,town,mortality,hardness
0,South,Bath,1247,105
1,North,Birkenhead,1668,17
2,South,Birmingham,1466,5
3,North,Blackburn,1800,14
4,North,Blackpool,1609,18


In [4]:
corr_data = water_data[['mortality', 'hardness']]
print('ANS 1:', round(corr_data.corr(method='pearson').values[0,1],4))
print('ANS 2:', round(corr_data.corr(method='spearman').values[0,1],4))

ANS 1: -0.6548
ANS 2: -0.6317


In [5]:
print('SOUTH')
corr_data = water_data[water_data['location'] == 'South'][['mortality', 'hardness']]
print('ANS 1:', round(corr_data.corr(method='pearson').values[0,1],4))
print('ANS 2:', round(corr_data.corr(method='spearman').values[0,1],4))

SOUTH
ANS 1: -0.6022
ANS 2: -0.5957


In [6]:
print('NORTH')
corr_data = water_data[water_data['location'] == 'North'][['mortality', 'hardness']]
print('ANS 1:', round(corr_data.corr(method='pearson').values[0,1],4))
print('ANS 2:', round(corr_data.corr(method='spearman').values[0,1],4))

NORTH
ANS 1: -0.3686
ANS 2: -0.4042


### Task 4
### Создадим матрицу сопряженности


| X                     | Мужчина | Женщина |
|-----------------------|---------|---------|
| Хотя бы 1 раз в месяц | 239     | 203     |
| Реже                  | 515     | 718     |

In [7]:
conjugation_matrix = np.array([[239, 203], [515, 718]])
MCC = ((conjugation_matrix[0,0] * conjugation_matrix[1,1]) - (conjugation_matrix[0,1] * conjugation_matrix[1,0])) / (np.sqrt((conjugation_matrix[0,0] + conjugation_matrix[0,1]) * (conjugation_matrix[0,0] + conjugation_matrix[1,0]) * (conjugation_matrix[0,1] + conjugation_matrix[1,1]) * (conjugation_matrix[1,0] + conjugation_matrix[1,1])))
round(MCC,3)

0.109

In [8]:
stats.chi2_contingency(conjugation_matrix)[1]

1.0558987006638725e-05

In [9]:
conjugation_matrix.shape

(2, 2)

In [10]:
def calculate_confidence_interval(sample, alpha=.05):
    z_quantile = stats.norm.ppf(1 - alpha/2.)
    n1 = (sample[0,0] + sample[1,0])
    n2 = (sample[0,1] + sample[1,1])
    p1 = sample[0,0] / n1
    p2 = sample[0,1] / n2
    add_subtract = z_quantile * np.sqrt((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2))
    return p1 - p2 - add_subtract, p1 - p2 + add_subtract

print('ANS6:', round(calculate_confidence_interval(conjugation_matrix)[0],4))

ANS6: 0.0539


In [11]:
#   Task 7
def conjugation_matrix_ttest(sample, alternative="two-sided"):
    if alternative not in ('two-sided', 'greater', 'less'):
        raise ValueError('Unknown alternative')

    a = sample[0,0]
    b = sample[0,1]
    c = sample[1,0]
    d = sample[1,1]
    n1 = a + c
    n2 = b + d
    p1 = a / n1
    p2 = b / n2
    P = ((p1 * n1) + (p2 * n2)) / (n1 + n2)
    print(p1)
    print(p2)
    print(P)
    z_stat = (p1 - p2) / np.sqrt(P * (1 - P) * ((1 / n1) + (1 / n2)))
    print(z_stat)
    if alternative == 'two-sided':
        return 2 * (1 - stats.norm.cdf(np.abs(z_stat)))
    if alternative == 'less':
        return stats.norm.cdf(z_stat)
    if alternative == 'greater':
        return 1 - stats.norm.cdf(z_stat)

conjugation_matrix_ttest(conjugation_matrix)

0.3169761273209549
0.22041259500542887
0.2638805970149254
4.46111444482329


8.153453089576601e-06

### Task 8
### Создадим матрицу сопряженности


|         X         | Не доволен | Средне | Доволен |
|:-----------------:|:----------:|:------:|:-------:|
| Не очень счастлив |     197    |   111  |    33   |
|       Средне      |     382    |   658  |   331   |
|   Очень счастлив  |     110    |   342  |   333   |

In [16]:
happines = np.array([[197., 111., 33.], [382., 685., 331.], [110., 342., 333.]])
stats.chi2_contingency(happines)

(293.68311039689746,
 2.4964299580093467e-62,
 4,
 array([[ 93.08597464, 153.74722662,  94.16679873],
        [381.6251981 , 630.318542  , 386.0562599 ],
        [214.28882726, 353.93423138, 216.77694136]]))

In [21]:
round(np.sqrt(stats.chi2_contingency(happines)[0] / (2 * happines.sum())),4)

0.2412