In [9]:
# All Libraries
import pandas as pd
from scipy.stats import chi2_contingency
import json

# Helper Functions

In [24]:
def chi_2_test(data, patient_group_1, patient_group_2, question):
    both_patient_group_df = data[
        (data['Patient Group'] == patient_group_1) |
        (data['Patient Group'] == patient_group_2)
    ]
    question= '14) Did you experience any nausea over the last 2 weeks? If so, how many days of the past two weeks?'

    both_patient_group_q = both_patient_group_df[['Patient Group', question]]

    both_patient_group_q_contingency = pd.crosstab(both_patient_group_q['Patient Group'], both_patient_group_q[question])

    chi_2, p_value, dof, expected = chi2_contingency(both_patient_group_q_contingency)
    
    print(f"The Chi Squared value is {chi_2}")
    print(f"The p value is {p_value}")
    print(f"The degrees of freedoms is {dof}")
    
    
# Example of how to use
# ### input below ##
# patient_group_1 = 'Definitive Stricture'
# patient_group_2 = 'Probably Stricture'
# question = '14) Did you experience any nausea over the last 2 weeks? If so, how many days of the past two weeks?'

# ### Dont change below #####

# chi_2_test(data, patient_group_1, patient_group_2, question)

# Analysis

In [27]:
# specificy file locations
data_file_loc = 'data/encoded_data.csv'
label_mapping_loc = 'symptom_label_mapping.json'



# load saved mapping
with open(label_mapping_loc) as f:
    mapping = json.load(f)

# read in data
data = pd.read_csv(data_file_loc)

data.head()

Unnamed: 0,Patient Group,"14) Did you experience any nausea over the last 2 weeks? If so, how many days of the past two weeks?","15) Did you experience any nausea over the last 2 months? If so, how many days of the past two months?",16) Out of the days where you experienced nausea how severe would you say it was on average over the past two weeks?,17) Out of the days where you experienced nausea how severe would you say it was on average over the past 2 months?,"13) Over the course of 2 weeks from now, did you experience increasing pain after a meal?","14) Over the course of 2 months from now, did you experience increasing pain after a meal?","6) In the past 2 weeks did you experience abdominal bloating and if so, how severe?","7) In the past 2 months did you experience abdominal bloating and if so, how severe?","18) Did you experience any vomiting over the last 2 weeks? If so, how many days of the past two weeks?","19) Did you experience any vomiting over the last 2 months? If so, how many days of the past two months?",20) Out of the days where you experienced vomiting how severe would you say it was on average over the past two weeks?,21) Out of the days where you experienced vomiting how severe would you say it was on average over the past two months?,2) Over the course of 2 weeks how many of these days did you experience obstructive pain?,"3) If none to question 2, have you had any obstructive pain over the past 2 months and for how many days?",4) On average how severe was your obstructive pain over the last two weeks?,5) On average how severe was your obstructive pain over the last two months?
0,Definitive Stricture,1,2,1,1,1,1,1,1,1,1,2,2,1,3,1,3
1,Probably Stricture,3,3,2,2,2,2,0,2,1,1,2,2,0,3,0,0
2,Probably Stricture,0,0,0,0,2,2,0,0,1,1,2,2,1,3,0,0
3,Definitive Stricture,3,3,2,2,2,2,2,2,1,1,2,2,4,3,3,3
4,Probably Stricture,3,0,2,1,3,3,2,1,1,0,1,1,4,3,3,3


In [4]:
data['Patient Group'].value_counts()

Probably Stricture      27
Definitive Stricture    24
Inflammatory            23
Name: Patient Group, dtype: int64

In [25]:
# lists of questions related to the symptom
nausea_q = [question for question in data.columns if 'nausea' in question]

pain_q = [question for question in data.columns if ('pain' in question) & ('obstructive' not in question)]

vomit_q = [question for question in data.columns if 'vomit' in question]

bloat_q = [question for question in data.columns if 'bloat' in question]
                             
obstructive_q = [question for question in data.columns if 'obstructive' in question]

In [26]:
prob_stric = data[
                data['Patient Group'] == 'Probably Stricture'
                ]

def_stric = data[
                data['Patient Group'] == 'Definitive Stricture'
                ]

inflamm = data[
                data['Patient Group'] == 'Inflammatory'
                ]

# Nausea

In [7]:
nausea_prob_stric = prob_stric[nausea_q]
nausea_def_stric = def_stric[nausea_q]
nausea_inflamm = inflamm[nausea_q]

## Question:
### Is there a significant difference betwen probable and defintive patients for nausea in last 2 weeks?
- chi sqaure test
- alpha = 0.05

In [8]:
### input below ##
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Probably Stricture'
question = '14) Did you experience any nausea over the last 2 weeks? If so, how many days of the past two weeks?'

### Dont change below #####

chi_2_test(data, patient_group_1, patient_group_2, question)

The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


Since p_value > 0., therefore fail to reject null hyp, therefore no significant association

In [None]:
is there any significant difference between definive and infllmatory group 

In [13]:
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Inflammatory'  # Changed to 'Inflammatory'
question = '14) Did you experience any nausea over the last 2 weeks? If so, how many days of the past two weeks?'

chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


In [14]:
# Define the patient groups and question
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
question = '14) Did you experience any nausea over the last 2 weeks? If so, how many days of the past two weeks?'

# Call the chi_2_test function with these parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


PAIN AFTER MEAL


In [15]:
# Define the patient groups and the new question
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
question = '13) Over the course of 2 weeks from now, did you experience increasing pain after a meal?'

# Call the chi_2_test function with these parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


In [16]:
# Define the patient groups for the new comparison
patient_group_1 = 'Inflammatory'
patient_group_2 = 'Definitive Stricture'
question = '13) Over the course of 2 weeks from now, did you experience increasing pain after a meal?'

# Call the chi_2_test function with these parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


In [17]:
# Define the patient groups for this comparison
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Probably Stricture'
question = '13) Over the course of 2 weeks from now, did you experience increasing pain after a meal?'

# Call the chi_2_test function with these parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


BLOATING OVER 2 WEEKS

In [18]:
# Define the patient groups
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Probably Stricture'
# Update to the specific text of question 6
question = '6) In the past 2 weeks did you experience abdominal bloating and if so, how severe?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


In [19]:
# Define the patient groups for the new comparison
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Inflammatory'
# The specific text of question 6
question = '6) In the past 2 weeks did you experience abdominal bloating and if so, how severe?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


In [20]:
# Define the patient groups for this comparison
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
# The specific text of question 6
question = '6) In the past 2 weeks did you experience abdominal bloating and if so, how severe?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


QUESTION 18- VOMITTING OVER 2 weeks

In [21]:
# Define the patient groups
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
# Update to the specific text of question 18
question = '18) Did you experience any vomiting over the last 2 weeks? If so, how many days of the past two weeks?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


In [22]:
# Define the patient groups for this comparison
patient_group_1 = 'Inflammatory'
patient_group_2 = 'Definitive Stricture'
# The specific text of question 18
question = '18) Did you experience any vomiting over the last 2 weeks? If so, how many days of the past two weeks?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


In [23]:
# Define the patient groups for this comparison
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Probably Stricture'
# The specific text of question 18
question = '18) Did you experience any vomiting over the last 2 weeks? If so, how many days of the past two weeks?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


# 2 month questions


question 15


In [28]:
# Define the patient groups
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Probably Stricture'
# Update to the specific text of question 15
question = '15) Did you experience any nausea over the last 2 months? If so, how many days of the past two months?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


In [29]:
# Define the patient groups for this comparison
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Inflammatory'
# The specific text of question 15
question = '15) Did you experience any nausea over the last 2 months? If so, how many days of the past two months?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


In [30]:
# Define the patient groups for this comparison
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
# The specific text of question 15
question = '15) Did you experience any nausea over the last 2 months? If so, how many days of the past two months?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


POST PRANDIA PAIN FOR 2 MONTHS< question 14

In [31]:
# Define the patient groups
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
# Update to the specific text of question 14
question = '14) Over the course of 2 months from now, did you experience increasing pain after a meal?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


In [32]:
# Define the patient groups for this comparison
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Definitive Stricture'
# The specific text of question 14
question = '14) Over the course of 2 months from now, did you experience increasing pain after a meal?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


In [33]:
# Define the patient groups for this comparison
patient_group_1 = 'Inflammatory'
patient_group_2 = 'Definitive Stricture'
# The specific text of question 14
question = '14) Over the course of 2 months from now, did you experience increasing pain after a meal?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


bLOATING OVER 2 MONTHS, question 7

In [34]:
# Define the patient groups
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
# Update to the specific text of question 7
question = '7) In the past 2 months did you experience abdominal bloating and if so, how severe?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


In [35]:
# Define the patient groups for this comparison
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Definitive Stricture'
# The specific text of question 7
question = '7) In the past 2 months did you experience abdominal bloating and if so, how severe?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


In [36]:
# Define the patient groups for this comparison
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Inflammatory'
# The specific text of question 7
question = '7) In the past 2 months did you experience abdominal bloating and if so, how severe?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4


VOMITTING over 2 MONTHS, question 19


In [37]:
# Define the patient groups
patient_group_1 = 'Probably Stricture'
patient_group_2 = 'Inflammatory'
# Update to the specific text of question 19
question = '19) Did you experience any vomiting over the last 2 months? If so, how many days of the past two months?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.8092261059012977
The p value is 0.7707939535811835
The degrees of freedoms is 4


In [38]:
# Define the patient groups for this comparison
patient_group_1 = 'Definitive Stricture'
patient_group_2 = 'Probably Stricture'
# The specific text of question 19
question = '19) Did you experience any vomiting over the last 2 months? If so, how many days of the past two months?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 1.3011499402628435
The p value is 0.8611803970021262
The degrees of freedoms is 4


In [39]:
# Define the patient groups for this comparison
patient_group_1 = 'Inflammatory'
patient_group_2 = 'Definitive Stricture'
# The specific text of question 19
question = '19) Did you experience any vomiting over the last 2 months? If so, how many days of the past two months?'

# Call the chi_2_test function with these updated parameters
chi_2_test(data, patient_group_1, patient_group_2, question)


The Chi Squared value is 3.6240421455938696
The p value is 0.45926972516175824
The degrees of freedoms is 4
