In [1]:
import numpy as np
import pandas as pd

In [2]:
# The statistical module used to run chi square test
import scipy.stats as stats

In [3]:
# Observed data in a (hypothetical) survey of 6000 people 
observed = pd.Series([1500, 1350, 1600, 1550], index=["1", "2", "3", "4"])

In [4]:
# Create a data frame
df = pd.DataFrame([observed]).T
df

Unnamed: 0,0
1,1500
2,1350
3,1600
4,1550


In [5]:
# Add a column whose default values are the expected values
df[1] = df[0].sum()/len(df[0])
df

Unnamed: 0,0,1
1,1500,1500.0
2,1350,1500.0
3,1600,1500.0
4,1550,1500.0


In [8]:
# Rename the data frame columns
df.columns = ['Observed', 'Expected']

In [13]:
# View the data frame
df

Unnamed: 0,Observed,Expected
1,1500,1500.0
2,1350,1500.0
3,1600,1500.0
4,1550,1500.0


In [15]:
# Calculate the critical value
critical_value = stats.chi2.ppf(q = 0.95, df = len(df['Observed'] - 1))

In [16]:
# Print the critical value
critical_value

9.487729036781154

In [17]:
# Run the chi square test with stats.chisquare()
stats.chisquare(df['Observed'], df['Expected'])

Power_divergenceResult(statistic=23.333333333333336, pvalue=3.4411276663397364e-05)

In [None]:
# Since the pvalue is < 0.05 and chisquare value is greater than the critical value, 
# we conclude that the results are statiscally significant