### Chi-Square Test of Independence

In [2]:
import pandas as pd
import numpy as np
from scipy import stats

In [3]:
stats.chi2.ppf(0.99, 6)

16.811893829770927

Record of 50 students studying in ABN school is taken at random.

res_num = registration_no\
aa = academic ability\
pe = parent education\
sm = student motivation\
r = religion\
g = gender

In [4]:
acad = pd.read_excel('Data Files/Acad.xlsx')
acad

Unnamed: 0,Rsp No,aa,pe,sm,ae,r,g,c
0,1,99,19,1,2,0,0,1
1,2,46,12,0,0,0,0,0
2,3,57,15,1,1,0,0,0
3,4,94,18,2,2,1,1,1
4,5,82,13,2,1,1,1,1
5,6,59,12,0,0,2,0,0
6,7,61,12,1,2,0,0,0
7,8,29,9,0,0,1,1,0
8,9,36,13,1,1,0,0,0
9,10,91,16,2,2,1,1,0


In [5]:
# Cross table between gender and student's motivation

obs = pd.pivot_table(acad[['g', 'sm']], index='g', columns='sm', aggfunc=len)
obs

sm,0,1,2
g,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10,13,6
1,4,9,8


In [6]:
# Perform chi2 test to check independence

from scipy.stats import chi2_contingency

In [7]:
chi2, p, dof, tbl = chi2_contingency(obs)

In [8]:
chi2

2.3649585225939904

In [9]:
p

0.3065178579178871

In [10]:
dof

2

In [11]:
tbl

array([[ 8.12, 12.76,  8.12],
       [ 5.88,  9.24,  5.88]])

In [12]:
import scipy
from scipy.stats import chi2
from scipy.stats import poisson

In [13]:
data = pd.read_excel('Data Files/P_distribution.xlsx')
data

Unnamed: 0,Arrivals,Frequency
0,0,0
1,1,1
2,2,4
3,3,10
4,4,14
5,5,20
6,6,12
7,7,12
8,8,9
9,9,8


In [14]:
Observed_Freq = data['Frequency']

In [15]:
total_arrival = 600
total_time_period = 100

mu = total_arrival/total_time_period

In [16]:
Expected_Freq = []
for i in range(len(Observed_Freq)):
    E_Freq = 100*poisson.pmf(i, mu)
    Expected_Freq.append(E_Freq)

In [17]:
Expected_Freq

[0.24787521766663584,
 1.4872513059998145,
 4.461753917999444,
 8.923507835998894,
 13.385261753998332,
 16.062314104797995,
 16.06231410479801,
 13.767697804112569,
 10.32577335308442,
 6.883848902056284,
 4.130309341233764,
 2.2528960043093247,
 1.1264480021546681]

In [18]:
Expected_Freq_round_off = [round(elem, 2) for elem in Expected_Freq]
Expected_Freq_round_off

[0.25,
 1.49,
 4.46,
 8.92,
 13.39,
 16.06,
 16.06,
 13.77,
 10.33,
 6.88,
 4.13,
 2.25,
 1.13]

In [19]:
df = pd.DataFrame(list(zip(Observed_Freq, Expected_Freq_round_off)), columns=['Observed Frequency', 'Expected Frequency'])
df

Unnamed: 0,Observed Frequency,Expected Frequency
0,0,0.25
1,1,1.49
2,4,4.46
3,10,8.92
4,14,13.39
5,20,16.06
6,12,16.06
7,12,13.77
8,9,10.33
9,8,6.88


In [20]:
obs_freq = [5, 10, 14, 20, 12, 12, 9, 8, 10]
expected_freq = [6.20, 8.92, 13.39, 16.06, 16.06, 13.77, 10.33, 6.88, 8.39]

In [21]:
scipy.stats.chisquare(obs_freq, expected_freq)

Power_divergenceResult(statistic=3.2738182931105193, pvalue=0.916017731732134)

In [24]:
chi2.ppf(0.95, 7)

14.067140449340169

Checking GOF for Uniform Distribution

In [38]:
chi2.ppf(0.99, 11)

24.724970311318277

In [25]:
x = [1610, 1585, 1649, 1590, 1540, 1397, 1410, 1350, 1495, 1564, 1602, 1655]

In [26]:
np.mean(x)

1537.25

In [27]:
exp_f = [1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25, 1537.25]

In [28]:
from scipy.stats import chisquare
chisquare(x, exp_f)

Power_divergenceResult(statistic=74.37583346885673, pvalue=1.78545252783034e-11)

Checking GOF for normal distribution

In [29]:
A = [33, 43, 44, 45, 52, 52, 56, 58, 63, 64, 64, 65, 66, 68, 70, 72, 73, 73, 74, 75, 83, 84, 85, 86, 91, 92, 94, 98, 102, 105]

In [30]:
mean = np.mean(A)
mean

71.0

In [31]:
std = np.std(A)
std

18.226354544998845

In [32]:
x = 1/6 # for 6 equal probability intervals

In [34]:
for j in range(1, 6):
    Prob_intervals = [scipy.stats.norm.ppf(j*x, mean, std)]
    print(Prob_intervals)

[53.36743154175236]
[63.14941153083116]
[71.0]
[78.85058846916884]
[88.63256845824763]


In [35]:
Expected_Freq = [5, 5, 5, 5, 5, 5]      # will divide the normal distribution into 6 intervals at frequency 5 in each

In [36]:
Observed_Freq = [6, 3, 6, 5, 4, 6]

In [37]:
scipy.stats.chisquare(Observed_Freq, Expected_Freq)

Power_divergenceResult(statistic=1.5999999999999999, pvalue=0.9012493445012737)