## 5: CORRELATIONS - WHAT ITEM TYPES?

### Import libraries 

In [99]:
#Pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
#NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Matplotlib is a plotting library for python and pyplot gives us a MatLab like plotting framework. We will use this in our plotter function to plot data.
import matplotlib.pyplot as plt
#Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics
import seaborn as sns
import dataframe_image as dfi

from matplotlib.ticker import StrMethodFormatter

# importing the required function for correlations
from scipy.stats import chi2_contingency

### Load and view data 

In [100]:
df = pd.read_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2.csv")
# df.describe(include='all')

In [101]:
# use the corr function to display the correlation between all the features
data_corr = df.corr()
# data_corr

In [102]:
# aggregate afternoon and evening
df['time'] = df['time'].replace(['afternoon', 'evening'], 'afternoon')

# select only these data (for time)
df_morning = df[df["time"] == morning]
df_noon = df[df["time"] == noon]
df_afternoon = df[df["time"] == afternoon]

# 1. GENERAL CORRELATIONS: time/day/storetype-name

## 1.1. CORRELATION 1: item type vs day

In [103]:
# Cross tabulation between category and store type
CrosstabResult =pd.crosstab(index=df['item_type'],columns=df['day'])

CrosstabResult

day,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
item_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
alcoholic drinks,1,0,0,0,0,0,0
almond milk,0,0,1,0,0,0,0
andalouse sauce,0,0,0,0,1,0,0
apple sauce,1,0,2,0,1,0,0
apples,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...
veal,0,0,1,0,0,0,0
vegetable box,0,2,0,0,0,0,0
vegetable mix,0,0,0,0,3,0,0
yoghurt,1,3,2,1,1,0,0


In [104]:
# Performing Chi-sq test
ChiSqResult = chi2_contingency(CrosstabResult)


# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is:', ChiSqResult[1])

The P-Value of the ChiSq Test is: 0.007328794340601195


Significant (strong), we could limit options to what's bought per day:
> 1. Certain cat. are bought on specific days only/mostly
2. Could be by chance, but could be on purpose for some categories (e.g. bakery)
3. >  Let's check if this is also true for only the supermarket/times...

## 1.2. CORRELATION 2: item type vs time

In [105]:
# Cross tabulation between category and store type
CrosstabResult =pd.crosstab(index=df['item_type'],columns=df['time'])

CrosstabResult

time,afternoon,morning,noon
item_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alcoholic drinks,1,0,0
almond milk,1,0,0
andalouse sauce,1,0,0
apple sauce,3,1,0
apples,2,0,0
...,...,...,...
veal,0,1,0
vegetable box,2,0,0
vegetable mix,3,0,0
yoghurt,6,2,0


In [106]:
# Performing Chi-sq test
ChiSqResult = chi2_contingency(CrosstabResult)


# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is:', ChiSqResult[1])

The P-Value of the ChiSq Test is: 0.0004258260117925643


Significant (strong) correlation, we could limit options to what's bought per time [in a supermarket]

## 1.3.1 CORRELATION 3: item type vs store

### 1.3.1 item type vs store type

In [107]:
# Cross tabulation between category and store type
CrosstabResult =pd.crosstab(index=df['item_type'],columns=df['store_type'])

CrosstabResult

store_type,bakery,butcher,drugstore,furniture store,supermarket
item_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
alcoholic drinks,0,0,0,0,1
almond milk,0,0,0,0,1
andalouse sauce,0,0,0,0,1
apple sauce,0,0,0,0,4
apples,0,0,0,0,2
...,...,...,...,...,...
veal,0,0,0,0,1
vegetable box,0,0,0,0,2
vegetable mix,0,0,0,0,3
yoghurt,0,0,0,0,8


In [108]:
# Performing Chi-sq test
ChiSqResult = chi2_contingency(CrosstabResult)


# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is:', ChiSqResult[1])

The P-Value of the ChiSq Test is: 7.298136204606577e-19


### 1.3.2. item type vs store name

In [109]:
# Cross tabulation between category and store type
CrosstabResult =pd.crosstab(index=df['item_type'],columns=df['store_name'])

CrosstabResult

store_name,Albert Heijn,Brabo,Carrefour,Delhaize,Ikea,Kruidvat,Okay,Sys,Versavel Poelman
item_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
alcoholic drinks,0,0,0,1,0,0,0,0,0
almond milk,0,0,1,0,0,0,0,0,0
andalouse sauce,0,0,0,0,0,0,1,0,0
apple sauce,0,0,1,1,0,0,2,0,0
apples,0,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...
veal,0,0,0,0,0,0,1,0,0
vegetable box,2,0,0,0,0,0,0,0,0
vegetable mix,0,0,0,0,0,0,3,0,0
yoghurt,3,0,1,2,0,0,2,0,0


In [110]:
# Performing Chi-sq test
ChiSqResult = chi2_contingency(CrosstabResult)


# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is:', ChiSqResult[1])

The P-Value of the ChiSq Test is: 4.60371070098832e-15


## 1.4. item type vs promo

In [111]:
# Cross tabulation between category and store type
CrosstabResult =pd.crosstab(index=df['item_type'],columns=df['promo_num'])

CrosstabResult

promo_num,0,1
item_type,Unnamed: 1_level_1,Unnamed: 2_level_1
alcoholic drinks,1,0
almond milk,1,0
andalouse sauce,1,0
apple sauce,4,0
apples,2,0
...,...,...
veal,1,0
vegetable box,0,2
vegetable mix,1,2
yoghurt,7,1


In [112]:
# Performing Chi-sq test
ChiSqResult = chi2_contingency(CrosstabResult)


# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is:', ChiSqResult[1])

The P-Value of the ChiSq Test is: 0.0003625346131603951


We could separate items that are more often bought in promo and exclude them from regular groceries

#### (if for promo #1>#0)

In [113]:
CrosstabResult.drop(CrosstabResult[CrosstabResult[1] <= CrosstabResult[0]].index, inplace = True)

CrosstabResult = CrosstabResult.reset_index()
CrosstabResult

promo_num,item_type,0,1
0,beef,1,2
1,blueberries,0,2
2,chicken wrap,0,1
3,chocolates,0,1
4,egg wrap,0,1
5,fish,1,2
6,fruit salad,0,1
7,lunch,0,1
8,pork/veal,0,1
9,vegetable box,0,2


In [114]:
options = CrosstabResult.item_type.values.tolist()

In [115]:
options

['beef',
 'blueberries',
 'chicken wrap',
 'chocolates',
 'egg wrap',
 'fish',
 'fruit salad',
 'lunch',
 'pork/veal',
 'vegetable box',
 'vegetable mix']

In [116]:
# Now to delete these item types from the current dataframe
df = df[-df['item_type'].isin(options)]

# df with only the promo items
df_promo = df[df['item_type'].isin(options)]

# 2. Narrow down dataframe

## 2.1 Per DAY

In [117]:
# define data
Monday = 'Monday'
Tuesday = 'Tuesday'
Wednesday = 'Wednesday'
Thursday = 'Thursday'
Friday = 'Friday'
Saturday = 'Saturday'
Sunday = 'Sunday'

# select only these data (for day, store, store name)
df_Monday = df[df["day"] == Monday]
df_Tuesday = df[df["day"] == Tuesday]
df_Wednesday = df[df["day"] == Wednesday]
df_Thursday = df[df["day"] == Thursday]
df_Friday = df[df["day"] == Friday]
df_Saturday = df[df["day"] == Saturday]
df_Sunday = df[df["day"] == Sunday]

In [118]:
# Cross tabulation between category and store type
CrosstabResult_Monday =pd.crosstab(index=df_Monday['item_type'],columns=df_Monday['time'], dropna=False)
CrosstabResult_Tuesday =pd.crosstab(index=df_Tuesday['item_type'],columns=df_Tuesday['time'], dropna=False)
CrosstabResult_Wednesday =pd.crosstab(index=df_Wednesday['item_type'],columns=df_Wednesday['time'], dropna=False)
CrosstabResult_Thursday =pd.crosstab(index=df_Thursday['item_type'],columns=df_Thursday['time'], dropna=False)
CrosstabResult_Friday =pd.crosstab(index=df_Friday['item_type'],columns=df_Friday['time'], dropna=False)
CrosstabResult_Saturday =pd.crosstab(index=df_Saturday['item_type'],columns=df_Saturday['time'], dropna=False)
CrosstabResult_Sunday =pd.crosstab(index=df_Sunday['item_type'],columns=df_Sunday['time'], dropna=False)

# Performing Chi-sq test
Monday = chi2_contingency(CrosstabResult_Monday)
Tuesday = chi2_contingency(CrosstabResult_Tuesday)
Wednesday = chi2_contingency(CrosstabResult_Wednesday)
Thursday = chi2_contingency(CrosstabResult_Thursday)
Friday = chi2_contingency(CrosstabResult_Friday)
Saturday = chi2_contingency(CrosstabResult_Saturday)
Sunday = chi2_contingency(CrosstabResult_Sunday)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is, for Mondays:', Monday[1])
print('The P-Value of the ChiSq Test is, for Tuesdays:', Tuesday[1])
print('The P-Value of the ChiSq Test is, for Wednesdays:', Wednesday[1])
print('The P-Value of the ChiSq Test is, for Thursdays:', Thursday[1])
print('The P-Value of the ChiSq Test is, for Fridays:', Friday[1])
print('The P-Value of the ChiSq Test is, for Saturdays:', Saturday[1])
print('The P-Value of the ChiSq Test is, for Sundays:', Sunday[1])

The P-Value of the ChiSq Test is, for Mondays: 0.056031382824621265
The P-Value of the ChiSq Test is, for Tuesdays: 0.07250903390723971
The P-Value of the ChiSq Test is, for Wednesdays: 0.26829256976484567
The P-Value of the ChiSq Test is, for Thursdays: 0.11240514931002711
The P-Value of the ChiSq Test is, for Fridays: 0.47984765918811956
The P-Value of the ChiSq Test is, for Saturdays: 0.4147281669423075
The P-Value of the ChiSq Test is, for Sundays: 1.0


In [119]:
# Cross tabulation between category and store type
CrosstabResult_Monday =pd.crosstab(index=df_Monday['item_type'],columns=df_Monday['store_type'], dropna=False)
CrosstabResult_Tuesday =pd.crosstab(index=df_Tuesday['item_type'],columns=df_Tuesday['store_type'], dropna=False)
CrosstabResult_Wednesday =pd.crosstab(index=df_Wednesday['item_type'],columns=df_Wednesday['store_type'], dropna=False)
CrosstabResult_Thursday =pd.crosstab(index=df_Thursday['item_type'],columns=df_Thursday['store_type'], dropna=False)
CrosstabResult_Friday =pd.crosstab(index=df_Friday['item_type'],columns=df_Friday['store_type'], dropna=False)
CrosstabResult_Saturday =pd.crosstab(index=df_Saturday['item_type'],columns=df_Saturday['store_type'], dropna=False)
CrosstabResult_Sunday =pd.crosstab(index=df_Sunday['item_type'],columns=df_Sunday['store_type'], dropna=False)

# Performing Chi-sq test
Monday = chi2_contingency(CrosstabResult_Monday)
Tuesday = chi2_contingency(CrosstabResult_Tuesday)
Wednesday = chi2_contingency(CrosstabResult_Wednesday)
Thursday = chi2_contingency(CrosstabResult_Thursday)
Friday = chi2_contingency(CrosstabResult_Friday)
Saturday = chi2_contingency(CrosstabResult_Saturday)
Sunday = chi2_contingency(CrosstabResult_Sunday)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is, for Mondays:', Monday[1])
print('The P-Value of the ChiSq Test is, for Tuesdays:', Tuesday[1])
print('The P-Value of the ChiSq Test is, for Wednesdays:', Wednesday[1])
print('The P-Value of the ChiSq Test is, for Thursdays:', Thursday[1])
print('The P-Value of the ChiSq Test is, for Fridays:', Friday[1])
print('The P-Value of the ChiSq Test is, for Saturdays:', Saturday[1])
print('The P-Value of the ChiSq Test is, for Sundays:', Sunday[1])

The P-Value of the ChiSq Test is, for Mondays: 0.013325217451875017
The P-Value of the ChiSq Test is, for Tuesdays: 0.025016961216351507
The P-Value of the ChiSq Test is, for Wednesdays: 0.07189777249646509
The P-Value of the ChiSq Test is, for Thursdays: 0.9719243231099925
The P-Value of the ChiSq Test is, for Fridays: 0.8579347122641507
The P-Value of the ChiSq Test is, for Saturdays: 0.003411846997498827
The P-Value of the ChiSq Test is, for Sundays: 0.020558772205794218


In [120]:
# Cross tabulation between category and store type
CrosstabResult_Monday =pd.crosstab(index=df_Monday['item_type'],columns=df_Monday['store_name'], dropna=False)
CrosstabResult_Tuesday =pd.crosstab(index=df_Tuesday['item_type'],columns=df_Tuesday['store_name'], dropna=False)
CrosstabResult_Wednesday =pd.crosstab(index=df_Wednesday['item_type'],columns=df_Wednesday['store_name'], dropna=False)
CrosstabResult_Thursday =pd.crosstab(index=df_Thursday['item_type'],columns=df_Thursday['store_name'], dropna=False)
CrosstabResult_Friday =pd.crosstab(index=df_Friday['item_type'],columns=df_Friday['store_name'], dropna=False)
CrosstabResult_Saturday =pd.crosstab(index=df_Saturday['item_type'],columns=df_Saturday['store_name'], dropna=False)
CrosstabResult_Sunday =pd.crosstab(index=df_Sunday['item_type'],columns=df_Sunday['store_name'], dropna=False)

# Performing Chi-sq test
Monday = chi2_contingency(CrosstabResult_Monday)
Tuesday = chi2_contingency(CrosstabResult_Tuesday)
Wednesday = chi2_contingency(CrosstabResult_Wednesday)
Thursday = chi2_contingency(CrosstabResult_Thursday)
Friday = chi2_contingency(CrosstabResult_Friday)
Saturday = chi2_contingency(CrosstabResult_Saturday)
Sunday = chi2_contingency(CrosstabResult_Sunday)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is, for Mondays:', Monday[1])
print('The P-Value of the ChiSq Test is, for Tuesdays:', Tuesday[1])
print('The P-Value of the ChiSq Test is, for Wednesdays:', Wednesday[1])
print('The P-Value of the ChiSq Test is, for Thursdays:', Thursday[1])
print('The P-Value of the ChiSq Test is, for Fridays:', Friday[1])
print('The P-Value of the ChiSq Test is, for Saturdays:', Saturday[1])
print('The P-Value of the ChiSq Test is, for Sundays:', Sunday[1])

The P-Value of the ChiSq Test is, for Mondays: 0.013325217451875017
The P-Value of the ChiSq Test is, for Tuesdays: 0.02501696121635147
The P-Value of the ChiSq Test is, for Wednesdays: 0.07189777249646509
The P-Value of the ChiSq Test is, for Thursdays: 0.3392738454656977
The P-Value of the ChiSq Test is, for Fridays: 0.47984765918811956
The P-Value of the ChiSq Test is, for Saturdays: 0.012125102015729035
The P-Value of the ChiSq Test is, for Sundays: 0.0004918318703550143


## 2.2 Per TIME

### Aggregate time

In [122]:
# Cross tabulation between category and store type
CrosstabResult_morning =pd.crosstab(index=df_morning['item_type'],columns=df_morning['day'])
CrosstabResult_noon =pd.crosstab(index=df_noon['item_type'],columns=df_noon['day'])
CrosstabResult_afternoon =pd.crosstab(index=df_afternoon['item_type'],columns=df_afternoon['day'])

# Performing Chi-sq test
ChiSqResult_morning = chi2_contingency(CrosstabResult_morning)
ChiSqResult_noon = chi2_contingency(CrosstabResult_noon)
ChiSqResult_afternoon = chi2_contingency(CrosstabResult_afternoon)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is, for mornings:', ChiSqResult_morning[1])
print('The P-Value of the ChiSq Test is, for noons:', ChiSqResult_noon[1])
print('The P-Value of the ChiSq Test is, for afternoons:', ChiSqResult_afternoon[1])

The P-Value of the ChiSq Test is, for mornings: 0.06806927661604609
The P-Value of the ChiSq Test is, for noons: 0.3931395804642435
The P-Value of the ChiSq Test is, for afternoons: 0.20518932320943198


In [123]:
# Cross tabulation between category and store type
CrosstabResult_morning =pd.crosstab(index=df_morning['item_type'],columns=df_morning['store_type'])
CrosstabResult_noon =pd.crosstab(index=df_noon['item_type'],columns=df_noon['store_type'])
CrosstabResult_afternoon =pd.crosstab(index=df_afternoon['item_type'],columns=df_afternoon['store_type'])

# Performing Chi-sq test
ChiSqResult_morning = chi2_contingency(CrosstabResult_morning)
ChiSqResult_noon = chi2_contingency(CrosstabResult_noon)
ChiSqResult_afternoon = chi2_contingency(CrosstabResult_afternoon)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is, for mornings:', ChiSqResult_morning[1])
print('The P-Value of the ChiSq Test is, for noons:', ChiSqResult_noon[1])
print('The P-Value of the ChiSq Test is, for afternoons:', ChiSqResult_afternoon[1])

The P-Value of the ChiSq Test is, for mornings: 7.287611240114154e-15
The P-Value of the ChiSq Test is, for noons: 0.030109079689220955
The P-Value of the ChiSq Test is, for afternoons: 0.9863457396421107


In [124]:
# Cross tabulation between category and store type
CrosstabResult_morning =pd.crosstab(index=df_morning['item_type'],columns=df_morning['store_name'])
CrosstabResult_noon =pd.crosstab(index=df_noon['item_type'],columns=df_noon['store_name'])
CrosstabResult_afternoon =pd.crosstab(index=df_afternoon['item_type'],columns=df_afternoon['store_name'])

# Performing Chi-sq test
ChiSqResult_morning = chi2_contingency(CrosstabResult_morning)
ChiSqResult_noon = chi2_contingency(CrosstabResult_noon)
ChiSqResult_afternoon = chi2_contingency(CrosstabResult_afternoon)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test is, for mornings:', ChiSqResult_morning[1])
print('The P-Value of the ChiSq Test is, for noons:', ChiSqResult_noon[1])
print('The P-Value of the ChiSq Test is, for afternoons:', ChiSqResult_afternoon[1])

The P-Value of the ChiSq Test is, for mornings: 2.5077167072641074e-20
The P-Value of the ChiSq Test is, for noons: 0.0054330191728131725
The P-Value of the ChiSq Test is, for afternoons: 0.599755979050132


## 2.4 Per STORE NAME

In [96]:
# define data
store1 = 'Albert Heijn'
store2 = 'Carrefour'
store3 = 'Okay'
store4 = 'Delhaize'
store5 = 'Versavel Poelman'
store6 = 'Kruidvat'
store7 = 'Brabo'
store8 = 'Ikea'
store9 = 'Sys'

# select only these data for the df
df_store1 = df[df['store_name'] == store1]
df_store2 = df[df['store_name'] == store2]
df_store3 = df[df['store_name'] == store3]
df_store4 = df[df['store_name'] == store4]
df_store5 = df[df['store_name'] == store5]
df_store6 = df[df['store_name'] == store6]
df_store7 = df[df['store_name'] == store7]
df_store8 = df[df['store_name'] == store8]
df_store9 = df[df['store_name'] == store9] 

In [97]:
# Cross tabulation between category and day
CrosstabResult1=pd.crosstab(index=df_store1['item_type'],columns=df_store1['time'])
CrosstabResult2=pd.crosstab(index=df_store2['item_type'],columns=df_store2['time'])
CrosstabResult3=pd.crosstab(index=df_store3['item_type'],columns=df_store3['time'])
CrosstabResult4=pd.crosstab(index=df_store4['item_type'],columns=df_store4['time'])
CrosstabResult5=pd.crosstab(index=df_store5['item_type'],columns=df_store5['time'])
CrosstabResult6=pd.crosstab(index=df_store6['item_type'],columns=df_store6['time'])
CrosstabResult7=pd.crosstab(index=df_store7['item_type'],columns=df_store7['time'])
CrosstabResult8=pd.crosstab(index=df_store8['item_type'],columns=df_store8['time'])
CrosstabResult9=pd.crosstab(index=df_store9['item_type'],columns=df_store9['time'])

# Performing Chi-sq test
ChiSqResult1 = chi2_contingency(CrosstabResult1)
ChiSqResult2 = chi2_contingency(CrosstabResult2)
ChiSqResult3 = chi2_contingency(CrosstabResult3)
ChiSqResult4 = chi2_contingency(CrosstabResult4)
ChiSqResult5 = chi2_contingency(CrosstabResult5)
ChiSqResult6 = chi2_contingency(CrosstabResult6)
ChiSqResult7 = chi2_contingency(CrosstabResult7)
ChiSqResult8 = chi2_contingency(CrosstabResult8)
ChiSqResult9 = chi2_contingency(CrosstabResult9)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test AH is:', ChiSqResult1[1])
print('The P-Value of the ChiSq Test Carrefour is:', ChiSqResult2[1])
print('The P-Value of the ChiSq Test Okay is:', ChiSqResult3[1])
print('The P-Value of the ChiSq Test Delhaize is:', ChiSqResult4[1])
print('The P-Value of the ChiSq Test Versavel Poelman is:', ChiSqResult5[1])
print('The P-Value of the ChiSq Test Kruidvat is:', ChiSqResult6[1])
print('The P-Value of the ChiSq Test Brabo is:', ChiSqResult7[1])
print('The P-Value of the ChiSq Test Ikea is:', ChiSqResult8[1])
print('The P-Value of the ChiSq Test Sys is:', ChiSqResult9[1])

The P-Value of the ChiSq Test AH is: 0.27146301019057517
The P-Value of the ChiSq Test Carrefour is: 1.0
The P-Value of the ChiSq Test Okay is: 0.786598072074264
The P-Value of the ChiSq Test Delhaize is: 0.512171595235117
The P-Value of the ChiSq Test Versavel Poelman is: 1.0
The P-Value of the ChiSq Test Kruidvat is: 1.0
The P-Value of the ChiSq Test Brabo is: 1.0
The P-Value of the ChiSq Test Ikea is: 1.0
The P-Value of the ChiSq Test Sys is: 0.9604070567886343


In [98]:
# Cross tabulation between category and day
CrosstabResult1=pd.crosstab(index=df_store1['item_type'],columns=df_store1['day'])
CrosstabResult2=pd.crosstab(index=df_store2['item_type'],columns=df_store2['day'])
CrosstabResult3=pd.crosstab(index=df_store3['item_type'],columns=df_store3['day'])
CrosstabResult4=pd.crosstab(index=df_store4['item_type'],columns=df_store4['day'])
CrosstabResult5=pd.crosstab(index=df_store5['item_type'],columns=df_store5['day'])
CrosstabResult6=pd.crosstab(index=df_store6['item_type'],columns=df_store6['day'])
CrosstabResult7=pd.crosstab(index=df_store7['item_type'],columns=df_store7['day'])
CrosstabResult8=pd.crosstab(index=df_store8['item_type'],columns=df_store8['day'])
CrosstabResult9=pd.crosstab(index=df_store9['item_type'],columns=df_store9['day'])

# Performing Chi-sq test
ChiSqResult1 = chi2_contingency(CrosstabResult1)
ChiSqResult2 = chi2_contingency(CrosstabResult2)
ChiSqResult3 = chi2_contingency(CrosstabResult3)
ChiSqResult4 = chi2_contingency(CrosstabResult4)
ChiSqResult5 = chi2_contingency(CrosstabResult5)
ChiSqResult6 = chi2_contingency(CrosstabResult6)
ChiSqResult7 = chi2_contingency(CrosstabResult7)
ChiSqResult8 = chi2_contingency(CrosstabResult8)
ChiSqResult9 = chi2_contingency(CrosstabResult9)

# P-Value is the Probability of H0 being True
# If P-Value > 0.05 then only we Accept the assumption(H0)

print('The P-Value of the ChiSq Test AH is:', ChiSqResult1[1])
print('The P-Value of the ChiSq Test Carrefour is:', ChiSqResult2[1])
print('The P-Value of the ChiSq Test Okay is:', ChiSqResult3[1])
print('The P-Value of the ChiSq Test Delhaize is:', ChiSqResult4[1])
print('The P-Value of the ChiSq Test Versavel Poelman is:', ChiSqResult5[1])
print('The P-Value of the ChiSq Test Kruidvat is:', ChiSqResult6[1])
print('The P-Value of the ChiSq Test Brabo is:', ChiSqResult7[1])
print('The P-Value of the ChiSq Test Ikea is:', ChiSqResult8[1])
print('The P-Value of the ChiSq Test Sys is:', ChiSqResult9[1])

The P-Value of the ChiSq Test AH is: 0.9943296369032215
The P-Value of the ChiSq Test Carrefour is: 1.0
The P-Value of the ChiSq Test Okay is: 0.5209070930125062
The P-Value of the ChiSq Test Delhaize is: 0.512171595235117
The P-Value of the ChiSq Test Versavel Poelman is: 0.5905214502087732
The P-Value of the ChiSq Test Kruidvat is: 1.0
The P-Value of the ChiSq Test Brabo is: 1.0
The P-Value of the ChiSq Test Ikea is: 1.0
The P-Value of the ChiSq Test Sys is: 0.865542455654743
