## Import the dependencies

In [91]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency

# Get the data

In [92]:
data = pd.read_csv(r"d:\pg project responses\analyze using python\cust_res.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,Name,Gender,Age,Employment Status,Education Qualification,District,Area of Residence,type of vehicle,EV,...,4.5 cost consideration,5.1 consumer preference,5.2 consumer preference,5.3 consumer preference,5.4 consumer preference,5.5 consumer preference,6.1 evm consideration,6.2 evm consideration,6.3 evm consideration,6.4 evm consideration
0,0,Anish Glorisha,Female,18 - 27,Student,Post Graduate,Coimbatore,Urban,Petrol,No,...,4,4,4,4,3,4,3,3,4,3
1,1,Hari,Male,18 - 27,Student,Post Graduate,coimbatore,Rural,Diesel,Yes,...,2,1,1,3,3,3,1,2,2,4
2,2,CHINNAPPANDI J,Male,18 - 27,Student,Post Graduate,coimbatore,Rural,Petrol,Yes,...,1,4,2,4,3,2,4,4,5,5
3,3,P. Manikandan,Male,18 - 27,Student,Post Graduate,Viruthunagar,Urban,Diesel,Yes,...,3,3,1,3,2,2,3,4,4,3
4,4,Divyasri,Female,18 - 27,Student,Post Graduate,Dharmapuri,Urban,Petrol,No,...,4,4,2,2,3,2,2,3,4,3


In [93]:
data.drop(columns="Unnamed: 0", axis=1, inplace=True)
data.head()
data.columns


Index(['Name', 'Gender', 'Age', 'Employment Status',
       'Education Qualification ', 'District', 'Area of Residence',
       'type of vehicle', 'EV', '1.1 current awareness',
       '1.2 current awareness', '1.3 current awareness',
       '1.4 current awareness', '2.1 user satisfaction',
       '2.2 user satisfaction', '2.3 user satisfaction',
       '2.4 user satisfaction', '2.5 user satisfaction',
       '3.1 charging facilities', '3.2 charging facilities',
       '3.3 charging facilities', '3.4 charging facilities',
       '4.1 cost consideration', '4.2 cost consideration',
       '4.3 cost consideration', '4.4 cost consideration',
       '4.5 cost consideration', '5.1 consumer preference',
       '5.2 consumer preference', '5.3 consumer preference',
       '5.4 consumer preference', '5.5 consumer preference',
       '6.1 evm consideration', '6.2 evm consideration',
       '6.3 evm consideration', '6.4 evm consideration'],
      dtype='object')

# Selecting the ratings columns

In [94]:
cat_col = data.iloc[:, 9:].columns
cat_col

Index(['1.1 current awareness', '1.2 current awareness',
       '1.3 current awareness', '1.4 current awareness',
       '2.1 user satisfaction', '2.2 user satisfaction',
       '2.3 user satisfaction', '2.4 user satisfaction',
       '2.5 user satisfaction', '3.1 charging facilities',
       '3.2 charging facilities', '3.3 charging facilities',
       '3.4 charging facilities', '4.1 cost consideration',
       '4.2 cost consideration', '4.3 cost consideration',
       '4.4 cost consideration', '4.5 cost consideration',
       '5.1 consumer preference', '5.2 consumer preference',
       '5.3 consumer preference', '5.4 consumer preference',
       '5.5 consumer preference', '6.1 evm consideration',
       '6.2 evm consideration', '6.3 evm consideration',
       '6.4 evm consideration'],
      dtype='object')

# Chi2 test for independence of attrbutes



## 1. Gender vs Ratings

### Null Hypothesis : There is no association between Gender and Ratings.
### Alternative Hypothesis : There is a association between Gender and Ratings. 

In [95]:
chi_result = []
for i in cat_col:
    genchi2_sol = chi2_contingency(pd.crosstab(data[i],data['Gender']))
    if genchi2_sol[1] < 0.05:
        chi_result.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result.append("Accept the null hypothesis(variables are independent)")


In [96]:
print("Chi2 statistics:", genchi2_sol[0])
print("p_val:",genchi2_sol[1] )
print("Dof:", genchi2_sol[2])

Chi2 statistics: 4.1830929618326875
p_val: 0.3817936361139577
Dof: 4


In [97]:
gen_result_chi = pd.DataFrame(data=[cat_col, chi_result]).T
gen_result_chi.columns = ['cat_features', 'Hypothesis']
print(gen_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Accept the null hypothesis(variables are indep...
1     1.2 current awareness  Accept the null hypothesis(variables are indep...
2     1.3 current awareness  Accept the null hypothesis(variables are indep...
3     1.4 current awareness  Reject the null hypothesis(variables are depen...
4     2.1 user satisfaction  Accept the null hypothesis(variables are indep...
5     2.2 user satisfaction  Reject the null hypothesis(variables are depen...
6     2.3 user satisfaction  Accept the null hypothesis(variables are indep...
7     2.4 user satisfaction  Reject the null hypothesis(variables are depen...
8     2.5 user satisfaction  Accept the null hypothesis(variables are indep...
9   3.1 charging facilities  Accept the null hypothesis(variables are indep...
10  3.2 charging facilities  Accept the null hypothesis(variables are indep...
11  3.3 charging facilities  Reject the null hypothe

In [98]:
gen_result_chi['Hypothesis'].value_counts()

Hypothesis
Accept the null hypothesis(variables are independent)    20
Reject the null hypothesis(variables are dependent)       7
Name: count, dtype: int64

In [99]:
gen_result_chi.describe(include='all')

Unnamed: 0,cat_features,Hypothesis
count,27,27
unique,27,2
top,1.1 current awareness,Accept the null hypothesis(variables are indep...
freq,1,20


After analysing the results we accept the null hypothesis.

* There is no association between Gender and Ratings.

## 2. Age vs Ratings

### Null Hypothesis : There is no association between Age and Ratings.
### Alternative Hypothesis : There is an association between Age and Ratings. 

In [100]:
chi_result1 = []
for i in cat_col:
    agechi2_sol = chi2_contingency(pd.crosstab(data[i],data['Age']))
    if agechi2_sol[1]< 0.05:
        chi_result1.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result1.append("Accept the null hypothesis(variables are independent)")


In [101]:
print("Chi2 statistics:", agechi2_sol[0])
print("p_val:", agechi2_sol[1])
print("Dof:", agechi2_sol[2])

Chi2 statistics: 15.01724448628295
p_val: 0.05881101767929782
Dof: 8


In [109]:
age_result_chi = pd.DataFrame(data=[cat_col, chi_result1]).T
age_result_chi.columns = ['cat_features', 'Hypothesis']
print(age_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Accept the null hypothesis(variables are indep...
1     1.2 current awareness  Accept the null hypothesis(variables are indep...
2     1.3 current awareness  Accept the null hypothesis(variables are indep...
3     1.4 current awareness  Accept the null hypothesis(variables are indep...
4     2.1 user satisfaction  Accept the null hypothesis(variables are indep...
5     2.2 user satisfaction  Accept the null hypothesis(variables are indep...
6     2.3 user satisfaction  Accept the null hypothesis(variables are indep...
7     2.4 user satisfaction  Accept the null hypothesis(variables are indep...
8     2.5 user satisfaction  Reject the null hypothesis(variables are depen...
9   3.1 charging facilities  Reject the null hypothesis(variables are depen...
10  3.2 charging facilities  Reject the null hypothesis(variables are depen...
11  3.3 charging facilities  Accept the null hypothe

In [110]:
age_result_chi['Hypothesis'].value_counts()

Hypothesis
Accept the null hypothesis(variables are independent)    17
Reject the null hypothesis(variables are dependent)      10
Name: count, dtype: int64

In [111]:
age_result_chi.describe(include='all')

Unnamed: 0,cat_features,Hypothesis
count,27,27
unique,27,2
top,1.1 current awareness,Accept the null hypothesis(variables are indep...
freq,1,17


After analysing the results we accept the null hypothesis.

* There is no association between Age and Ratings.

## 3. Area of Residence vs Ratings

### Null Hypothesis : There is no association between Area of Residence and Ratings.
### Alternative Hypothesis : There is an association between Area of Residence and Ratings. 

In [108]:
data.iloc[:, :9].columns

Index(['Name', 'Gender', 'Age', 'Employment Status',
       'Education Qualification ', 'District', 'Area of Residence',
       'type of vehicle', 'EV'],
      dtype='object')

In [112]:
chi_result2 = []
for i in cat_col:
    area_chi2_sol = chi2_contingency(pd.crosstab(data[i],data['Area of Residence']))
    if area_chi2_sol[1]< 0.05:
        chi_result2.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result2.append("Accept the null hypothesis(variables are independent)")


In [114]:
print("Chi2 statistics:", area_chi2_sol[0])
print("p_val:", area_chi2_sol[1])
print("Dof:", area_chi2_sol[2])

Chi2 statistics: 14.422673306689054
p_val: 0.006061360887775882
Dof: 4


In [115]:
area_result_chi = pd.DataFrame(data=[cat_col, chi_result2]).T
area_result_chi.columns = ['cat_features', 'Hypothesis']
print(area_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Accept the null hypothesis(variables are indep...
1     1.2 current awareness  Reject the null hypothesis(variables are depen...
2     1.3 current awareness  Reject the null hypothesis(variables are depen...
3     1.4 current awareness  Accept the null hypothesis(variables are indep...
4     2.1 user satisfaction  Accept the null hypothesis(variables are indep...
5     2.2 user satisfaction  Accept the null hypothesis(variables are indep...
6     2.3 user satisfaction  Accept the null hypothesis(variables are indep...
7     2.4 user satisfaction  Accept the null hypothesis(variables are indep...
8     2.5 user satisfaction  Reject the null hypothesis(variables are depen...
9   3.1 charging facilities  Reject the null hypothesis(variables are depen...
10  3.2 charging facilities  Reject the null hypothesis(variables are depen...
11  3.3 charging facilities  Accept the null hypothe

In [116]:
area_result_chi['Hypothesis'].value_counts()

Hypothesis
Reject the null hypothesis(variables are dependent)      15
Accept the null hypothesis(variables are independent)    12
Name: count, dtype: int64

In [117]:
area_result_chi.describe(include='all')

Unnamed: 0,cat_features,Hypothesis
count,27,27
unique,27,2
top,1.1 current awareness,Reject the null hypothesis(variables are depen...
freq,1,15


After analysing the results we reject the null hypothesis.

* There is an association between Area of residence and Ratings.

## 4. Employment Status vs Ratings

### Null Hypothesis : There is no association between Employment Status and Ratings.
### Alternative Hypothesis : There is an association between Employment Status and Ratings. 

In [118]:
data.iloc[:, :9].columns

Index(['Name', 'Gender', 'Age', 'Employment Status',
       'Education Qualification ', 'District', 'Area of Residence',
       'type of vehicle', 'EV'],
      dtype='object')

In [119]:
chi_result3 = []
for i in cat_col:
    employment_chi2_sol = chi2_contingency(pd.crosstab(data[i],data['Employment Status']))
    if employment_chi2_sol[1]< 0.05:
        chi_result3.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result3.append("Accept the null hypothesis(variables are independent)")


In [120]:
print("Chi2 statistics:", employment_chi2_sol[0])
print("p_val:", employment_chi2_sol[1])
print("Dof:", employment_chi2_sol[2])

Chi2 statistics: 20.397002306301758
p_val: 0.05993893431648853
Dof: 12


In [121]:
employment_result_chi = pd.DataFrame(data=[cat_col, chi_result3]).T
employment_result_chi.columns = ['cat_features', 'Hypothesis']
print(employment_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Accept the null hypothesis(variables are indep...
1     1.2 current awareness  Accept the null hypothesis(variables are indep...
2     1.3 current awareness  Accept the null hypothesis(variables are indep...
3     1.4 current awareness  Accept the null hypothesis(variables are indep...
4     2.1 user satisfaction  Accept the null hypothesis(variables are indep...
5     2.2 user satisfaction  Reject the null hypothesis(variables are depen...
6     2.3 user satisfaction  Accept the null hypothesis(variables are indep...
7     2.4 user satisfaction  Accept the null hypothesis(variables are indep...
8     2.5 user satisfaction  Reject the null hypothesis(variables are depen...
9   3.1 charging facilities  Reject the null hypothesis(variables are depen...
10  3.2 charging facilities  Reject the null hypothesis(variables are depen...
11  3.3 charging facilities  Accept the null hypothe

In [122]:
employment_result_chi['Hypothesis'].value_counts()

Hypothesis
Accept the null hypothesis(variables are independent)    18
Reject the null hypothesis(variables are dependent)       9
Name: count, dtype: int64

In [123]:
employment_result_chi.describe(include='all')

Unnamed: 0,cat_features,Hypothesis
count,27,27
unique,27,2
top,1.1 current awareness,Accept the null hypothesis(variables are indep...
freq,1,18


After analysing the results we accept the null hypothesis.

* There is no association between Employment status and Ratings.

## 5. Education Qualification vs Ratings

### Null Hypothesis : There is no association between Education Qualification and Ratings.
### Alternative Hypothesis : There is an association between Education Qualification and Ratings. 

In [124]:
data.iloc[:, :9].columns

Index(['Name', 'Gender', 'Age', 'Employment Status',
       'Education Qualification ', 'District', 'Area of Residence',
       'type of vehicle', 'EV'],
      dtype='object')

In [125]:
chi_result4 = []
for i in cat_col:
    Education_chi2_sol = chi2_contingency(pd.crosstab(data[i],data['Education Qualification ']))
    if Education_chi2_sol[1]< 0.05:
        chi_result4.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result4.append("Accept the null hypothesis(variables are independent)")


In [126]:
print("Chi2 statistics:", Education_chi2_sol[0])
print("p_val:", Education_chi2_sol[1])
print("Dof:", Education_chi2_sol[2])

Chi2 statistics: 32.133196799589
p_val: 0.04189954661530382
Dof: 20


In [127]:
Education_result_chi = pd.DataFrame(data=[cat_col, chi_result4]).T
Education_result_chi.columns = ['cat_features', 'Hypothesis']
print(Education_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Accept the null hypothesis(variables are indep...
1     1.2 current awareness  Accept the null hypothesis(variables are indep...
2     1.3 current awareness  Accept the null hypothesis(variables are indep...
3     1.4 current awareness  Accept the null hypothesis(variables are indep...
4     2.1 user satisfaction  Accept the null hypothesis(variables are indep...
5     2.2 user satisfaction  Accept the null hypothesis(variables are indep...
6     2.3 user satisfaction  Accept the null hypothesis(variables are indep...
7     2.4 user satisfaction  Accept the null hypothesis(variables are indep...
8     2.5 user satisfaction  Accept the null hypothesis(variables are indep...
9   3.1 charging facilities  Accept the null hypothesis(variables are indep...
10  3.2 charging facilities  Reject the null hypothesis(variables are depen...
11  3.3 charging facilities  Reject the null hypothe

In [128]:
Education_result_chi['Hypothesis'].value_counts()

Hypothesis
Accept the null hypothesis(variables are independent)    24
Reject the null hypothesis(variables are dependent)       3
Name: count, dtype: int64

In [131]:
Education_result_chi['Hypothesis'].describe()

count                                                    27
unique                                                    2
top       Accept the null hypothesis(variables are indep...
freq                                                     24
Name: Hypothesis, dtype: object

After analysing the results we accept the null hypothesis.

* There is no association between Education Qualification and Ratings.

## 6. Other vehicles users in current period vs Ratings

### Null Hypothesis : There is no association between Other vehicles Users and Ratings.
### Alternative Hypothesis : There is an association between Other vehicles Users and Ratings. 

In [132]:
data.iloc[:, :9].columns

Index(['Name', 'Gender', 'Age', 'Employment Status',
       'Education Qualification ', 'District', 'Area of Residence',
       'type of vehicle', 'EV'],
      dtype='object')

In [133]:
chi_result5 = []
for i in cat_col:
    type_chi2_sol = chi2_contingency(pd.crosstab(data[i],data['type of vehicle']))
    if type_chi2_sol[1]< 0.05:
        chi_result5.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result5.append("Accept the null hypothesis(variables are independent)")


In [134]:
print("Chi2 statistics:", type_chi2_sol[0])
print("p_val:", type_chi2_sol[1])
print("Dof:", type_chi2_sol[2])

Chi2 statistics: 21.6781320874734
p_val: 0.04128955844457646
Dof: 12


In [135]:
type_result_chi = pd.DataFrame(data=[cat_col, chi_result5]).T
type_result_chi.columns = ['cat_features', 'Hypothesis']
print(type_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Reject the null hypothesis(variables are depen...
1     1.2 current awareness  Reject the null hypothesis(variables are depen...
2     1.3 current awareness  Reject the null hypothesis(variables are depen...
3     1.4 current awareness  Accept the null hypothesis(variables are indep...
4     2.1 user satisfaction  Reject the null hypothesis(variables are depen...
5     2.2 user satisfaction  Reject the null hypothesis(variables are depen...
6     2.3 user satisfaction  Reject the null hypothesis(variables are depen...
7     2.4 user satisfaction  Reject the null hypothesis(variables are depen...
8     2.5 user satisfaction  Reject the null hypothesis(variables are depen...
9   3.1 charging facilities  Reject the null hypothesis(variables are depen...
10  3.2 charging facilities  Reject the null hypothesis(variables are depen...
11  3.3 charging facilities  Reject the null hypothe

In [136]:
type_result_chi['Hypothesis'].value_counts()

Hypothesis
Reject the null hypothesis(variables are dependent)      24
Accept the null hypothesis(variables are independent)     3
Name: count, dtype: int64

In [137]:
type_result_chi['Hypothesis'].describe()

count                                                    27
unique                                                    2
top       Reject the null hypothesis(variables are depen...
freq                                                     24
Name: Hypothesis, dtype: object

After analysing the results we reject the null hypothesis.

* There is an  association between Education Qualification and Ratings.

## 7. EV buyers and Non-buyers vs Ratings

### Null Hypothesis : There is no association between EV buyers and Non-buyers, and Ratings.
### Alternative Hypothesis : There is an association between EV buyers and Non-buyers, and Ratings. 

In [138]:
data.iloc[:, :9].columns

Index(['Name', 'Gender', 'Age', 'Employment Status',
       'Education Qualification ', 'District', 'Area of Residence',
       'type of vehicle', 'EV'],
      dtype='object')

In [139]:
chi_result6 = []
for i in cat_col:
    EV_chi2_sol = chi2_contingency(pd.crosstab(data[i],data['EV']))
    if EV_chi2_sol[1]< 0.05:
        chi_result6.append("Reject the null hypothesis(variables are dependent)")
    else:
        chi_result6.append("Accept the null hypothesis(variables are independent)")


In [140]:
print("Chi2 statistics:", EV_chi2_sol[0])
print("p_val:", EV_chi2_sol[1])
print("Dof:", EV_chi2_sol[2])

Chi2 statistics: 6.330517258988859
p_val: 0.17578734880511415
Dof: 4


In [141]:
EV_result_chi = pd.DataFrame(data=[cat_col, chi_result6]).T
EV_result_chi.columns = ['cat_features', 'Hypothesis']
print(EV_result_chi)


               cat_features                                         Hypothesis
0     1.1 current awareness  Reject the null hypothesis(variables are depen...
1     1.2 current awareness  Reject the null hypothesis(variables are depen...
2     1.3 current awareness  Reject the null hypothesis(variables are depen...
3     1.4 current awareness  Accept the null hypothesis(variables are indep...
4     2.1 user satisfaction  Reject the null hypothesis(variables are depen...
5     2.2 user satisfaction  Accept the null hypothesis(variables are indep...
6     2.3 user satisfaction  Reject the null hypothesis(variables are depen...
7     2.4 user satisfaction  Reject the null hypothesis(variables are depen...
8     2.5 user satisfaction  Reject the null hypothesis(variables are depen...
9   3.1 charging facilities  Reject the null hypothesis(variables are depen...
10  3.2 charging facilities  Reject the null hypothesis(variables are depen...
11  3.3 charging facilities  Reject the null hypothe

In [142]:
EV_result_chi['Hypothesis'].value_counts()

Hypothesis
Reject the null hypothesis(variables are dependent)      19
Accept the null hypothesis(variables are independent)     8
Name: count, dtype: int64

In [143]:
EV_result_chi['Hypothesis'].describe()

count                                                    27
unique                                                    2
top       Reject the null hypothesis(variables are depen...
freq                                                     19
Name: Hypothesis, dtype: object

After analysing the results we reject the null hypothesis.

* There is an association between EV buyers and Non-buyers and Ratings.

In [105]:
#cross_tab = pd.crosstab(
 #   index=[data['Gender']],   # Row categories
  #  columns=[data['1.1 current awareness']]
#)
#cross_tab

In [106]:
#import pandas as pd
#from scipy.stats import chi2_contingency

# Create a contingency table
#data = {'Category_A': [50, 30, 20],
 #       'Category_B': [30, 50, 20]}

#df = pd.DataFrame(data, index=['Group 1', 'Group 2', 'Group 3'])

# Perform Chi-Square Test of Independence
#chi2_stat, p_val, dof, expected = chi2_contingency(dt)

# Print the results
#print("Chi-Square Statistic:", chi2_stat)
#print("Degrees of Freedom:", dof)
#print("P-Value:", p_val)
#print("Expected Frequencies:\n", expected)

# Interpreting the result
#alpha = 0.05
#if p_val < alpha:
#    print("Reject the null hypothesis (variables are dependent)")
#else:
 #   print("Fail to reject the null hypothesis (variables are independent)")
