<b>Import of required libraries</b>

In [29]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon, ranksums
from statsmodels.stats.descriptivestats import sign_test

<b>1. Sign test example</b>

In [None]:
def summarize_sample(sample):
    print('Sample 1st quartile: \t {0}'.format(np.percentile(sample, 25)),
         '\nSample median: \t\t {0}'.format(np.median(sample)),
          '\nSample 3rd quartile: \t {0}'.format(np.percentile(sample, 75)),
          '\nSample mean: \t\t {0}'.format(np.mean(sample)),
         '\nSample variance: \t {0}'.format(np.var(sample)),
         '\nSample std dev: \t {0}'.format(np.std(sample)))

w1 = [88, 69, 86, 59, 57, 82, 94, 93, 64, 91, 86, 59, 91, 60, 57, 92, 70, 88, 70, 85]
w2 = [73, 68, 75, 54, 53, 84, 84, 86, 66, 84, 78, 58, 91, 57, 59, 88, 71, 84, 64, 85]

print('=== First sample stats ===')
summarize_sample(w1)

print('\n=== Second sample stats ===')
summarize_sample(w2)

print('\nFirst sample median: \t {0}'.format(np.median(w1)),
     '\nSecond sample median: \t {0}'.format(np.median(w2)))

stat, p = wilcoxon(sorted(w1), sorted(w2), zero_method='wilcox')

alpha = 0.05

print('\np-value: \t\t {0}'.format(p),
     '\nalpha: \t\t\t {0}'.format(alpha))

if p <= alpha:
    print('\nResult: \t\t p-value is less/equal than alpha - we reject the null hyptohesis')
else:
    print('\nResult: \t\t p-value is greater than alpha - we can\'t reject the null hyptohesis')
    
plt.boxplot((w1, w2), labels=['Before diet', 'After diet'])
plt.title('Boxplot of patients\' weight')
plt.xlabel('Weight before/after diet')
plt.ylabel('Patient weight')


=== First sample stats ===
Sample 1st quartile: 	 63.0 
Sample median: 		 83.5 
Sample 3rd quartile: 	 88.75 
Sample mean: 		 77.05 
Sample variance: 	 186.1475 
Sample std dev: 	 13.643588237703453

=== Second sample stats ===
Sample 1st quartile: 	 62.75 
Sample median: 		 74.0 
Sample 3rd quartile: 	 84.0 
Sample mean: 		 73.1 
Sample variance: 	 148.58999999999997 
Sample std dev: 	 12.189749792346026

First sample median: 	 83.5 
Second sample median: 	 74.0

p-value: 		 0.0001775558502691252 
alpha: 			 0.05

Result: 		 p-value is less/equal than alpha - we reject the null hyptohesis


Text(0, 0.5, 'Patient weight')

<b>2. Sign test example for book reading samples</b>

In [3]:
df = pd.read_csv(os.path.join('dane_listy_3_-_5', 'czytelnictwo.csv'))

print('First sample median: \t {0}'.format(np.median(df['przed'])),
     '\nSecond sample median: \t {0}'.format(np.median(df['po'])))

stat, p = wilcoxon(sorted(df['przed']), sorted(df['po']), zero_method='wilcox')

alpha = 0.05

print('\np-value: \t\t {0}'.format(p),
     '\nalpha: \t\t\t {0}'.format(alpha))

if p <= alpha:
    print('\nResult: \t\t p-value is less/equal than alpha - we reject the null hyptohesis')
else:
    print('\nResult: \t\t p-value is greater than alpha - we can\'t reject the null hyptohesis')

First sample median: 	 17.0 
Second sample median: 	 17.0

p-value: 		 0.00021862011712466066 
alpha: 			 0.05

Result: 		 p-value is less/equal than alpha - we reject the null hyptohesis


<b>3. Wilcoxon rank-sum test example</b>

In [9]:
df = pd.read_csv(os.path.join('dane_listy_3_-_5', 'chmiel.csv'))

print('First sample median: \t {0}'.format(np.median(df['zapylona'])),
     '\nSecond sample median: \t {0}'.format(np.median(df['niezapyl'])))

stat, p = ranksums(df['zapylona'], df['niezapyl'])

alpha = 0.05

print('\np-value: \t\t {0}'.format(p),
     '\nalpha: \t\t\t {0}'.format(alpha))

if p <= alpha:
    print('\nResult: \t\t p-value is less/equal than alpha - we reject the null hyptohesis')
else:
    print('\nResult: \t\t p-value is greater than alpha - we can\'t reject the null hyptohesis')

First sample median: 	 0.73 
Second sample median: 	 0.18

p-value: 		 0.001745118699528905 
alpha: 			 0.05

Result: 		 p-value is less/equal than alpha - we reject the null hyptohesis


<b>4. Wilcoxon rank-sum test example for employees' press reading time</b>

In [16]:
df = pd.read_csv(os.path.join('dane_listy_3_-_5', 'czytelnictwo.csv'))

print('First sample median: \t {0}'.format(np.median(df['przed'])),
     '\nSecond sample median: \t {0}'.format(np.median(df['po'])))

stat, p = ranksums(df['przed'], df['po'])

alpha = 0.05

print('\np-value: \t\t {0}'.format(p),
     '\nalpha: \t\t\t {0}'.format(alpha))

if p <= alpha:
    print('\nResult: \t\t p-value is less/equal than alpha - we reject the null hyptohesis')
else:
    print('\nResult: \t\t p-value is greater than alpha - we can\'t reject the null hyptohesis')

First sample median: 	 17.0 
Second sample median: 	 17.0

p-value: 		 0.46458961554772527 
alpha: 			 0.05

Result: 		 p-value is greater than alpha - we can't reject the null hyptohesis


<b>5. Wilcoxon rank-sum test example for custom alpha (0.9)</b>

In [28]:
df = pd.read_csv(os.path.join('dane_listy_3_-_5', 'dane z koronografii.csv'))

print('First sample median: \t {0}'.format(np.median(df[df['group'] == 1]['time'])),
     '\nSecond sample median: \t {0}'.format(np.median(df[df['group'] == 2]['time'])))

stat, p = ranksums(df[df['group'] == 1]['time'], df[df['group'] == 2]['time'])

alpha = 0.9

print('\np-value: \t\t {0}'.format(p),
     '\nalpha: \t\t\t {0}'.format(alpha))

if p <= alpha:
    print('\nResult: \t\t p-value is less/equal than alpha - we reject the null hyptohesis')
else:
    print('\nResult: \t\t p-value is greater than alpha - we can\'t reject the null hyptohesis')

First sample median: 	 984.0 
Second sample median: 	 729.0

p-value: 		 0.02633008765302607 
alpha: 			 0.9

Result: 		 p-value is less/equal than alpha - we reject the null hyptohesis
