In [1]:
import numpy as np
import pandas as pd


import statsmodels.api  as  sm
import scipy.stats as stats

import warnings
warnings.filterwarnings('ignore')

In [2]:
data =  pd.read_csv('Customer_Data.csv')

In [3]:
data = data.drop('Unnamed: 0', axis =1)


In [4]:
data = data.drop('Unique_ID', axis =1)


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 537577 entries, 0 to 537576
Data columns (total 8 columns):
Gender                        537577 non-null object
Age                           537577 non-null object
Occupation                    537577 non-null int64
City_Category                 537577 non-null object
Stay_In_Current_City_Years    537577 non-null object
Marital_Status                537577 non-null int64
Product_Category_1            537577 non-null int64
Purchase                      537577 non-null int64
dtypes: int64(4), object(4)
memory usage: 32.8+ MB


In [6]:
data['Gender'] = pd.Categorical(data['Gender'])
data['Age'] = pd.Categorical(data['Age'])
data['Occupation'] = pd.Categorical(data['Occupation'])
data['City_Category'] = pd.Categorical(data['City_Category'])
data['Stay_In_Current_City_Years'] = pd.Categorical(data['Stay_In_Current_City_Years'])
data['Marital_Status'] = pd.Categorical(data['Marital_Status'])
data['Product_Category_1'] = pd.Categorical(data['Product_Category_1'])

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 537577 entries, 0 to 537576
Data columns (total 8 columns):
Gender                        537577 non-null category
Age                           537577 non-null category
Occupation                    537577 non-null category
City_Category                 537577 non-null category
Stay_In_Current_City_Years    537577 non-null category
Marital_Status                537577 non-null category
Product_Category_1            537577 non-null category
Purchase                      537577 non-null int64
dtypes: category(7), int64(1)
memory usage: 7.7 MB


#  1: Checking Dependents of Gender and Product_Category_1

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Gender and Product_Category_1 are Independent.
                        
Alternative hypothesis: $H_A$: Gender and Product_Category_1 are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [3]:
Gender_tab = pd.crosstab(index=data["Gender"], columns=data["Product_Category_1"])
Gender_array = np.array(Gender_tab)

Gender_tab

Product_Category_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
F,24401,5573,5874,3572,41199,4475,928,32947,70,1138,4659,1497,1428,613,1023,2363,61,376
M,113952,17926,13975,7995,107393,15689,2740,79185,334,3894,19301,2378,4012,887,5180,7334,506,2699


In [9]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(Gender_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 7902.05689 P value 0.000000 Degrees of freedom 17


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: $H_A$: Gender and Product_Category_1 are Not Independent. This means they are dependent.


# 2: Checking Dependents of Age and Product_Category_1

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Age and Product_Category_1 are Independent.
                        
Alternative hypothesis: $H_A$: Age and Product_Category_1 are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [10]:
Age_tab = pd.crosstab(index=data["Age"], columns=data["Product_Category_1"])
Age_array = np.array(Age_tab)

Age_tab

Product_Category_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0-17,3524,789,1180,744,4280,392,51,2213,16,108,728,120,108,37,158,226,6,27
18-25,26627,4348,4633,2436,28157,3711,474,17665,61,596,4549,435,748,228,1010,1579,41,336
26-35,57398,8784,7524,4124,60487,8344,1630,43533,151,1757,9735,1072,2055,553,2341,4056,125,1021
36-45,27215,4856,3785,2311,28936,3839,796,22919,106,1206,4884,981,1223,308,1377,1935,133,689
46-50,10292,2068,1343,971,11736,1596,322,10493,33,513,2072,507,537,149,589,868,91,346
51-55,8950,1763,908,666,9720,1429,263,9202,29,507,1440,428,475,151,500,661,106,420
55+,4347,891,476,315,5276,853,132,6107,8,345,552,332,294,74,228,372,65,236


In [11]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(Age_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 7440.54891 P value 0.000000 Degrees of freedom 102


# 3: Checking Dependents of Occupation and Product_Category_1

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Occupation and Product_Category_1 are Independent.
                        
Alternative hypothesis: $H_A$: Occupation and Product_Category_1 are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [12]:
Occupation_tab = pd.crosstab(index=data["Occupation"], columns=data["Product_Category_1"])
Occupation_array = np.array(Occupation_tab)

Occupation_tab

Product_Category_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,17382,2929,2589,1456,18703,2528,554,13856,48,627,3708,436,709,192,712,1209,63,419
1,10109,1880,1515,949,12850,1719,369,11736,26,488,1642,422,537,162,519,763,58,227
2,5635,1083,942,501,7443,1019,226,6127,11,277,1123,238,241,80,298,441,14,146
3,3913,661,606,394,5239,644,134,3803,13,193,688,175,215,68,169,337,14,100
4,18996,3001,3260,1683,20200,2622,443,13628,51,469,3157,365,578,179,734,1183,46,267
5,3617,581,443,252,3250,376,35,1904,8,83,782,78,95,23,144,211,9,94
6,4526,810,727,430,5580,722,143,4698,16,188,842,160,199,70,193,410,18,90
7,15824,2468,1574,1087,15356,2280,322,12423,38,519,2384,459,598,157,622,1156,82,457
8,506,97,67,39,367,45,5,263,1,13,63,3,8,0,24,15,1,7
9,1114,279,355,250,2127,191,15,1290,2,33,160,82,60,23,34,111,4,23


In [13]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(Occupation_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 9578.39397 P value 0.000000 Degrees of freedom 340


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: $H_A$: Occupation and Product_Category_1 are Not Independent. This means they are dependent.

# 4: Checking Dependents of City_Category and Product_Category_1

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: City_Category and Product_Category_1 are Independent.
                        
Alternative hypothesis: $H_A$: City_Category and Product_Category_1 are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [14]:
City_Category_tab = pd.crosstab(index=data["City_Category"], columns=data["Product_Category_1"])
City_Category_array = np.array(City_Category_tab)

City_Category_tab

Product_Category_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
City_Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
A,34553,6045,4829,2996,41491,5420,1210,31688,109,1307,6517,1043,1585,474,1698,2810,120,743
B,57417,10274,8448,5151,63162,8407,1580,46801,172,2028,10339,1648,2226,622,2597,3987,263,1371
C,46383,7180,6572,3420,43939,6337,878,33643,123,1697,7104,1184,1629,404,1908,2900,184,961


In [15]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(City_Category_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 1131.13863 P value 0.000000 Degrees of freedom 34


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: $H_A$: City_Category and Product_Category_1 are Not Independent. This means they are dependent.

# 5: Checking Dependents of Stay_In_Current_City_Years and Product_Category_1

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Stay_In_Current_City_Years and Product_Category_1 are Independent.
                        
Alternative hypothesis: $H_A$: Stay_In_Current_City_Years and Product_Category_1 are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [16]:
Stay_In_Current_City_Years_tab = pd.crosstab(index=data["Stay_In_Current_City_Years"], columns=data["Product_Category_1"])
Stay_In_Current_City_Years_array = np.array(Stay_In_Current_City_Years_tab)

Stay_In_Current_City_Years_tab

Product_Category_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Stay_In_Current_City_Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,18296,3140,2747,1563,20279,2708,544,15072,62,656,3445,530,762,216,898,1301,84,422
1,48017,8237,6872,4036,52205,7149,1276,40534,127,1817,8025,1380,1958,566,2159,3481,165,1188
2,26135,4542,3832,2102,27713,3670,595,19897,87,892,4525,721,961,245,1140,1777,112,513
3,24683,4183,3527,2074,25990,3391,680,18790,77,894,3972,661,923,231,1040,1602,113,481
4+,21222,3397,2871,1792,22405,3246,573,17839,51,773,3993,583,836,242,966,1536,93,471


In [17]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(Stay_In_Current_City_Years_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 391.13932 P value 0.000000 Degrees of freedom 68


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: $H_A$: Stay_In_Current_City_Years and Product_Category_1 are Not Independent. This means they are dependent.


# 6: Checking Dependents of Marital_Status and Product_Category_1

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Marital_Status and Product_Category_1 are Independent.
                        
Alternative hypothesis: $H_A$: Marital_Status and Product_Category_1 are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [18]:
Marital_Status_tab = pd.crosstab(index=data["Marital_Status"], columns=data["Product_Category_1"])
Marital_Status_array = np.array(Marital_Status_tab)

Marital_Status_tab

Product_Category_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,83230,13946,12158,7072,88411,11986,2004,64484,242,2734,14479,1999,3107,835,3577,5640,291,1622
1,55123,9553,7691,4495,60181,8178,1664,47648,162,2298,9481,1876,2333,665,2626,4057,276,1453


In [19]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(Marital_Status_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 527.24316 P value 0.000000 Degrees of freedom 17


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: $H_A$: Marital_Status and Product_Category_1 are Not Independent. This means they are dependent.


# 7: Checking Dependency of Marital_Status and Stay_In_Current_City_Years

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Marital_Status and Stay_In_Current_City_Years are Independent.
                        
Alternative hypothesis: $H_A$: Marital_Status and Stay_In_Current_City_Years are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [20]:
MarStay_tab = pd.crosstab(index=data["Marital_Status"], columns=data["Stay_In_Current_City_Years"])
MarStay_array = np.array(MarStay_tab)
MarStay_tab

Stay_In_Current_City_Years,0,1,2,3,4+
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,43916,108337,59419,56471,49674
1,28809,80855,40040,36841,33215


In [22]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(MarStay_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 432.06131 P value 0.000000 Degrees of freedom 4


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: Marital_Status and Stay_In_Current_City_Years are Not Independent. This means they are dependent.


# 8: Checking Dependency of Marital_Status and City_Category

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Marital_Status and City_Category are Independent.
                        
Alternative hypothesis: $H_A$: Marital_Status and City_Category are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [24]:
MarCity_tab = pd.crosstab(index=data["Marital_Status"], columns=data["City_Category"])
MarCity_array = np.array(MarCity_tab)
MarCity_tab

City_Category,A,B,C
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,89388,134213,94216
1,55250,92280,72230


In [25]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(MarCity_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 867.75576 P value 0.000000 Degrees of freedom 2


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: Marital_Status and City_Category are Not Independent. This means they are dependent.

# 9: Checking Dependency of Marital_Status and Occupation

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Marital_Status and Occupation are Independent.
                        
Alternative hypothesis: $H_A$: Marital_Status and Occupation are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [27]:
MarOccupation_tab = pd.crosstab(index=data["Marital_Status"], columns=data["Occupation"])
MarOccupation_array = np.array(MarOccupation_tab)
MarOccupation_tab

Occupation,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,39664,24065,13906,10202,51723,7473,10983,32693,672,3001,...,6997,16208,3553,15772,6483,13042,22243,3549,6853,16843
1,28456,21906,11939,7164,19139,4512,8839,25113,852,3152,...,4341,14215,3995,10940,5329,11748,16847,2976,1499,16067


In [28]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(MarOccupation_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 18260.26214 P value 0.000000 Degrees of freedom 20


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: Marital_Status and Occupation are Not Independent. This means they are dependent.

# 10: Checking Dependency of Marital_Status and Age

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Marital_Status and Age are Independent.
                        
Alternative hypothesis: $H_A$: Marital_Status and Age are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [30]:
MarAge_tab = pd.crosstab(index=data["Marital_Status"], columns=data["Age"])
MarAge_array = np.array(MarAge_tab)
MarAge_tab

Age,0-17,18-25,26-35,36-45,46-50,51-55,55+
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,14707,76993,130524,64992,12332,10639,7630
1,0,20641,84166,42507,32194,26979,13273


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: Marital_Status and Age are Not Independent. This means they are dependent.

# 11: Checking Dependency of Marital_Status and Gender

### Step 1: State the null and alternative hypothesis:

Null hypothesis: $H_0$: Marital_Status and Gender are Independent.
                        
Alternative hypothesis: $H_A$: Marital_Status and Gender are Not Independent.

### Step 2: Decide the significance level

Here we select α = 0.05

### Step 3: Identify the test statistic

We use the chi-square test of independence to find out the difference of categorical variables 

### Step 4: Calculate p value or chi-square statistic value

In [31]:
MarGender_tab = pd.crosstab(index=data["Marital_Status"], columns=data["Gender"])
MarGender_array = np.array(MarGender_tab)
MarGender_tab

Gender,F,M
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1
0,76974,240843
1,55223,164537


In [32]:

chi_sq_Stat, p_value, deg_freedom, exp_freq = stats.chi2_contingency(MarGender_array)

print('Chi-square statistic %3.5f P value %1.6f Degrees of freedom %d' %(chi_sq_Stat, p_value,deg_freedom))

Chi-square statistic 57.86466 P value 0.000000 Degrees of freedom 1


### Step 5: Decide to reject or accept null hypothesis

* Here, p value is 0 and < 0.05 so we reject the null hypothesis.
* Going with the Alternative hypothesis: Marital_Status and Gender are Not Independent. This means they are dependent.


# Conclusions-

## After performing the chi-square test of independence at 0.05 significance level, we arrive at the following:

### 1: Gender and Product_Category_1 are Dependent.     
### 2: Age and Product_Category_1 are Dependent.                        
### 3: Occupation and Product_Category_1 are Dependent.                 
### 4: City_Category and Product_Category_1 are Dependent.                
### 5: Stay_In_Current_City_Years and Product_Category_1 are Dependent.
### 6: Marital_Status and Product_Category_1 are Dependent.