In [1]:
import numpy as np
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import shapiro, zscore
import warnings
warnings.filterwarnings("ignore")

In [2]:
pd.set_option("display.max_columns", None)

In [3]:
data = pd.read_csv('alzheimers_disease_data.csv')
data.head()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,SleepQuality,FamilyHistoryAlzheimers,CardiovascularDisease,Diabetes,Depression,HeadInjury,Hypertension,SystolicBP,DiastolicBP,CholesterolTotal,CholesterolLDL,CholesterolHDL,CholesterolTriglycerides,MMSE,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis,DoctorInCharge
0,4751,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,9.025679,0,0,1,1,0,0,142,72,242.36684,56.150897,33.682563,162.189143,21.463532,6.518877,0,0,1.725883,0,0,0,1,0,0,XXXConfid
1,4752,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,7.151293,0,0,0,0,0,0,115,64,231.162595,193.407996,79.028477,294.630909,20.613267,7.118696,0,0,2.592424,0,0,0,0,1,0,XXXConfid
2,4753,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,9.673574,1,0,0,0,0,0,99,116,284.181858,153.322762,69.772292,83.638324,7.356249,5.895077,0,0,7.119548,0,1,0,1,0,0,XXXConfid
3,4754,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,8.392554,0,0,0,0,0,0,118,115,159.58224,65.366637,68.457491,277.577358,13.991127,8.965106,0,1,6.481226,0,0,0,0,0,0,XXXConfid
4,4755,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,5.597238,0,0,0,0,0,0,94,117,237.602184,92.8697,56.874305,291.19878,13.517609,6.045039,0,0,0.014691,0,0,1,1,0,0,XXXConfid


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2149 entries, 0 to 2148
Data columns (total 35 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   PatientID                  2149 non-null   int64  
 1   Age                        2149 non-null   int64  
 2   Gender                     2149 non-null   int64  
 3   Ethnicity                  2149 non-null   int64  
 4   EducationLevel             2149 non-null   int64  
 5   BMI                        2149 non-null   float64
 6   Smoking                    2149 non-null   int64  
 7   AlcoholConsumption         2149 non-null   float64
 8   PhysicalActivity           2149 non-null   float64
 9   DietQuality                2149 non-null   float64
 10  SleepQuality               2149 non-null   float64
 11  FamilyHistoryAlzheimers    2149 non-null   int64  
 12  CardiovascularDisease      2149 non-null   int64  
 13  Diabetes                   2149 non-null   int64

In [12]:
# Check for missing values
[x for x in data.columns if data[x].isnull().sum() > 0]

[]

In [13]:
# Statistical Summary
data.describe()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,SleepQuality,FamilyHistoryAlzheimers,CardiovascularDisease,Diabetes,Depression,HeadInjury,Hypertension,SystolicBP,DiastolicBP,CholesterolTotal,CholesterolLDL,CholesterolHDL,CholesterolTriglycerides,MMSE,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
count,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0,2149.0
mean,5825.0,74.908795,0.506282,0.697534,1.286645,27.655697,0.288506,10.039442,4.920202,4.993138,7.051081,0.25221,0.144253,0.150768,0.200558,0.092601,0.148906,134.264774,89.847836,225.197519,124.335944,59.463533,228.281496,14.755132,5.080055,0.208004,0.156817,4.982958,0.205212,0.158213,0.150768,0.158678,0.301536,0.353653
std,620.507185,8.990221,0.500077,0.996128,0.904527,7.217438,0.453173,5.75791,2.857191,2.909055,1.763573,0.434382,0.351428,0.357906,0.400511,0.28994,0.356079,25.949352,17.592496,42.542233,43.366584,23.139174,101.986721,8.613151,2.892743,0.405974,0.363713,2.949775,0.40395,0.365026,0.357906,0.365461,0.459032,0.478214
min,4751.0,60.0,0.0,0.0,0.0,15.008851,0.0,0.002003,0.003616,0.009385,4.002629,0.0,0.0,0.0,0.0,0.0,0.0,90.0,60.0,150.093316,50.230707,20.003434,50.407194,0.005312,0.00046,0.0,0.0,0.001288,0.0,0.0,0.0,0.0,0.0,0.0
25%,5288.0,67.0,0.0,0.0,1.0,21.611408,0.0,5.13981,2.570626,2.458455,5.482997,0.0,0.0,0.0,0.0,0.0,0.0,112.0,74.0,190.252963,87.195798,39.095698,137.583222,7.167602,2.566281,0.0,0.0,2.342836,0.0,0.0,0.0,0.0,0.0,0.0
50%,5825.0,75.0,1.0,0.0,1.0,27.823924,0.0,9.934412,4.766424,5.076087,7.115646,0.0,0.0,0.0,0.0,0.0,0.0,134.0,91.0,225.08643,123.342593,59.768237,230.301983,14.44166,5.094439,0.0,0.0,5.038973,0.0,0.0,0.0,0.0,0.0,0.0
75%,6362.0,83.0,1.0,1.0,2.0,33.869778,1.0,15.157931,7.427899,7.558625,8.562521,1.0,0.0,0.0,0.0,0.0,0.0,157.0,105.0,262.031657,161.733733,78.93905,314.839046,22.161028,7.546981,0.0,0.0,7.58149,0.0,0.0,0.0,0.0,1.0,1.0
max,6899.0,90.0,1.0,3.0,3.0,39.992767,1.0,19.989293,9.987429,9.998346,9.99984,1.0,1.0,1.0,1.0,1.0,1.0,179.0,119.0,299.993352,199.965665,99.980324,399.941862,29.991381,9.996467,1.0,1.0,9.999747,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
# Check for duplicates
data.duplicated().sum()

0

In [15]:
# Define numerical columns
num_columns = ['Age', 'BMI', 'AlcoholConsumption', 'PhysicalActivity', 'SystolicBP', 'DiastolicBP', 'CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL', 'CholesterolTriglycerides', 'MMSE']

In [None]:
# Detecting outliers using Plotly
fig = px.box(data, y=num_columns)
fig.update_layout(title="Outlies in Numerical Columns", template="plotly_dark")
fig.show()

In [44]:
# Distribution of ages among patients with and without Alzheimer's?
fig = px.histogram(data, x='Age', color='Diagnosis',barmode='overlay', labels={'Diagnosis':'Diagnosis (0:No, 1:Yes)'}, template="plotly_dark", color_discrete_map={0: '#00CC96', 1: '#FF6B6B'})
fig.update_layout(title="Age Distribution Among Patients with and without Alzheimer's")
fig.show()

>**Interpretation**: The age distribution shows that patients with Alzheimer's are at Age 61 and without Alzheimer's are at Age 72 years old. There is a noticeable difference in the number of Alzheimer's patients in various age groups.

In [25]:
# Compare Gender distribution between patients with and without Alzheimer's?
fig = px.histogram(data, x='Gender', color='Diagnosis', barmode='group', labels={'Gender':'Gender (0=Male, 1=Female)', 'Diagnosis':'Diagnosis (0=No, 1=Yes)'}, template="plotly_dark", color_discrete_map={0: '#00CC96', 1: '#FF6B6B'})
fig.update_layout(title="Gender Distribution Comparison Between Patients with and without Alzheimer's")
fig.show()

>**Interpretation**: The gender distribution indicates that the number of males and females with and without Alzheimer's is fairly similar, but slightly more males are diagnosed with Alzheimer's compared to females.

In [50]:
# What is the percentage of patients with a family history of Alzheimer's?
family_history_percentage = data['FamilyHistoryAlzheimers'].value_counts(normalize=True) * 100

# Create pie chart with custom colors
fig = px.pie(values=family_history_percentage, 
             names=family_history_percentage.index, 
             title='Percentage of Patients with Family History of Alzheimer\'s', 
             template="plotly_dark",
             color_discrete_sequence=['#3366FF', '#FF9900'],
             labels={'FamilyHistoryAlzheimers':'FamilyHistoryAlzheimers (0=No, 1=Yes)'})

# Add percentage labels
fig.update_traces(textinfo='percent+label')
fig.show()

>**Interpretation**: Approximately 25.2% of patients have a family history of Alzheimer's, indicating a potential genetic predisposition.

In [14]:
# What is the average BMI for patients with and without Alzheimer's?
avg_bmi = data.groupby('Diagnosis')['BMI'].mean().reset_index()
avg_bmi

Unnamed: 0,Diagnosis,BMI
0,0,27.515092
1,1,27.91267


In [15]:
fig = px.bar(avg_bmi, x='Diagnosis', y='BMI', labels={'Diagnosis':'Diagnosis (0:No, 1:Yes)', 'BMI':'Average BMI'}, template="plotly_dark", color_discrete_sequence=['#FF6B6B'])
fig.update_layout(title="Average BMI for Patients with and without Alzheimer's")
fig.show()

>**Interpretation**: The average BMI for patients with and without Alzheimer's is nearly the same, suggesting that BMI is not a distinguishing factor between these groups.

In [16]:
# How many patients have diabetes, and how does this compare to their Alzheimer's diagnosis status?
diabetes_counts = data.groupby(['Diagnosis', 'Diabetes']).size().reset_index(name='counts')
fig = px.bar(diabetes_counts, x='Diagnosis', y='counts', color='Diabetes', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'Diabetes':'Diabetes (0:No, 1:Yes)', 'counts':'Number of Patients'}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Diabetes by Alzheimer's Diagnosis Status")
fig.show()

>**Interpretation**: A higher number of patients without Alzheimer's have diabetes compared to those with Alzheimer's.

In [60]:
num_columns

['Age',
 'BMI',
 'AlcoholConsumption',
 'PhysicalActivity',
 'SystolicBP',
 'DiastolicBP',
 'CholesterolTotal',
 'CholesterolLDL',
 'CholesterolHDL',
 'CholesterolTriglycerides',
 'MMSE']

In [61]:
# How does BMI, Age, and MMSE score correlate with Alzheimer's diagnosis?
fig = px.scatter_3d(data, x='Age', y='AlcoholConsumption', z='BMI', color='Diagnosis', labels={'Diagnosis':'Diagnosis (0:No, 1:Yes)', 'Age':'Age', 'AlcoholConsumption':'AlcoholConsumption', 'BMI':'BMI'}, template="plotly_dark", color_discrete_map={0: '#00CC96', 1: '#FF6B6B'})
fig.update_layout(title="3D Scatter Plot of Age, AlcoholConsumption, and BMI by Alzheimer's Diagnosis")
fig.show()

>**Interpretation**: No definate Pattern Can be observed between Age, BMI, AlcoholConsumption

In [51]:
# What is the correlation matrix for the dataset?
corr_matrix = data[num_columns].corr()
fig = px.imshow(corr_matrix, text_auto=True, title='Correlation Matrix', template="plotly_dark",aspect='auto')
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
    margin=dict(l=100, r=100, b=100, t=100, pad=4)
)
fig.show()

In [19]:
# How does the average MMSE score vary across different age groups?
age_groups = pd.cut(data['Age'], bins=[60, 70, 80, 90], labels=['60-69', '70-79', '80-89'])
avg_mmse_by_age = data.groupby(age_groups)['MMSE'].mean().reset_index()

# Bar chart with custom colors
fig = px.bar(avg_mmse_by_age, 
             x='Age', 
             y='MMSE', 
             labels={'Age':'Age Group', 'MMSE':'Average MMSE Score'}, 
             template="plotly_dark",
             color='Age',  # Color bars by age group
             color_discrete_map={'60-69': '#9C27B0', '70-79': '#2196F3', '80-89': '#4CAF50'})  # Purple, Blue, Green

fig.update_layout(title="Average MMSE Score by Age Group")
fig.show()

>**Interpretation**: The average MMSE score remains relatively consistent across different age groups, with slight variations.

In [20]:
# What is the distribution of physical activity levels among patients with Alzheimer's?
fig = px.histogram(data[data['Diagnosis'] == 1], 
                  x='PhysicalActivity', 
                  labels={'PhysicalActivity':'Physical Activity (hours/week)'}, 
                  template="plotly_dark",
                  color_discrete_sequence=['#FF6B6B'])
fig.update_traces(opacity=0.75)
fig.update_layout(title="Distribution of Physical Activity Levels Among Patients with Alzheimer's")
fig.show()

>**Interpretation**: Physical activity levels among Alzheimer's patients are widely spread, with no significant concentration in a particular range

In [22]:
# What is the distribution of systolic blood pressure for patients with and without hypertension?
fig = px.histogram(data, x='SystolicBP', color='Hypertension', barmode='overlay', labels={'SystolicBP':'Systolic Blood Pressure', 'Hypertension':'Hypertension (0:No, 1:Yes)'}, template="plotly_dark", color_discrete_map={0: '#00CC96', 1: '#FF6B6B'})
fig.update_traces(opacity=0.8)  # Set transparency for better visibility
fig.update_layout(title="Distribution of Systolic Blood Pressure for Patients with and without Hypertension")
fig.show()

>**Interpretation**: Patients with hypertension generally have higher systolic blood pressure compared to those without hypertension.

In [23]:
# How many patients experience memory complaints, and what is their Alzheimer's diagnosis status?
memory_complaints_counts = data.groupby(['Diagnosis', 'MemoryComplaints']).size().reset_index(name='counts')
fig = px.bar(memory_complaints_counts, x='Diagnosis', y='counts', color='MemoryComplaints', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'MemoryComplaints':'Memory Complaints (0=No, 1=Yes)', 'counts':'Number of Patients'}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Memory Complaints by Alzheimer's Diagnosis Status")
fig.show()

>**Interpretation**: A significant number of patients with Alzheimer's have memory complaints, while fewer patients without Alzheimer's report memory complaints.

In [26]:
# What is the average cholesterol level (total, LDL, HDL) for patients with and without Alzheimer's?
avg_cholesterol = data.groupby('Diagnosis')[['CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL']].mean().reset_index()
fig = px.bar(avg_cholesterol.melt(id_vars='Diagnosis'), x='Diagnosis', y='value', color='variable', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'value':'Average Cholesterol Level', 'variable':'Cholesterol Type'}, template="plotly_dark")
fig.update_layout(title="Average Cholesterol Levels (Total, LDL, HDL) by Alzheimer's Diagnosis Status")
fig.show()

>**Interpretation**: Average Cholesterol Level between People having Alzheimer's disease and not having is almost same

In [83]:
# How many patients have behavioral symptoms, and how does this relate to their Alzheimer's diagnosis?
behavioral_symptoms_counts = data.groupby(["Diagnosis", "BehavioralProblems"]).size().reset_index(name="counts")
fig = px.bar(behavioral_symptoms_counts, x="Diagnosis", y="counts", color="BehavioralProblems", barmode="group", labels={"Diagnosis":"Diagnosis (0=No, 1=Yes)", "BehavioralProblems":"Behavioral Symptoms (0:No, 1:Yes)", "counts":"Number of Patients"}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Behavioral Symptoms by Alzheimer's Diagnosis Status")
fig.show()

>**Interpretation**: A higher proportion of patients diagnosed with Alzheimer's exhibit behavioral problems compared to those without the diagnosis.

In [84]:
# What is the distribution of ADL scores among patients with and without Alzheimer's?
fig = px.box(data, x='Diagnosis', y='ADL', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'ADL':'Activities of Daily Living Score'}, template="plotly_dark")
fig.update_layout(title="Distribution of ADL Scores Among Patients with and without Alzheimer's Disease")
fig.show()

>**Interpretation**: Patients with Alzheimer's Disease have their ADL(Activity of Daily Living) have been less as compared to Patients without Alzheimer's disease

In [85]:
# How does diet quality vary among patients with different education levels?
fig = px.box(data, x='EducationLevel', y='DietQuality', labels={'EducationLevel':'Education Level', 'DietQuality':'Diet Quality Score'}, template="plotly_dark")
fig.update_layout(title="Diet Quality by Education Level")
fig.show()

>**Interpretation**: Diet quality scores are fairly consistent across different education levels, indicating that education level does not significantly influence diet quality among patients.

In [86]:
data.columns

Index(['PatientID', 'Age', 'Gender', 'Ethnicity', 'EducationLevel', 'BMI',
       'Smoking', 'AlcoholConsumption', 'PhysicalActivity', 'DietQuality',
       'SleepQuality', 'FamilyHistoryAlzheimers', 'CardiovascularDisease',
       'Diabetes', 'Depression', 'HeadInjury', 'Hypertension', 'SystolicBP',
       'DiastolicBP', 'CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL',
       'CholesterolTriglycerides', 'MMSE', 'FunctionalAssessment',
       'MemoryComplaints', 'BehavioralProblems', 'ADL', 'Confusion',
       'Disorientation', 'PersonalityChanges', 'DifficultyCompletingTasks',
       'Forgetfulness', 'Diagnosis', 'DoctorInCharge'],
      dtype='object')

In [87]:
# Does the Personality Changes of patients correlate with their Alzheimer's diagnosis?
PersonalityChanges_counts = data.groupby(["Diagnosis", "PersonalityChanges"]).size().reset_index(name="counts")
fig = px.bar(PersonalityChanges_counts, x="Diagnosis", y="counts", color="PersonalityChanges", barmode="group", labels={"Diagnosis":"Diagnosis (0=No, 1=Yes)", "PersonalityChanges":"PersonalityChanges Symptoms (0:No, 1:Yes)"}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Personality Symptoms Alzheimer's Diagnosis Status")
fig.show()

>**Interpretation**: The personality changes are less frequently observed in patients diagnosed with Alzheimer's disease compared to those without the diagnosis.

In [16]:
# How many patients experience memory complaints, and what is their Alzheimer's diagnosis status?
memory_complaints_counts = data.groupby(['Diagnosis', 'MemoryComplaints']).size().reset_index(name='counts')
fig = px.bar(memory_complaints_counts, x='Diagnosis', y='counts', color='MemoryComplaints', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'MemoryComplaints':'Memory Complaints (0=No, 1=Yes)', 'counts':'Number of Patients'}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Memory Complaints by Alzheimer's Diagnosis Status")
fig.show()

>**Interpretation**: A significant number of patients with Alzheimer's have memory complaints, while fewer patients without Alzheimer's report memory complaints

In [17]:
# What is the average cholesterol level (total, LDL, HDL) for patients with and without Alzheimer's?
avg_cholesterol = data.groupby('Diagnosis')[['CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL']].mean().reset_index()
fig = px.bar(avg_cholesterol.melt(id_vars='Diagnosis'), x='Diagnosis', y='value', color='variable', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'value':'Average Cholesterol Level', 'variable':'Cholesterol Type'}, template="plotly_dark")
fig.update_layout(title="Average Cholesterol Levels (Total, LDL, HDL) by Alzheimer's Diagnosis Status")
fig.show()


>**Interpretation**: Patients with Alzheimer's tend to have slightly higher average total cholesterol and LDL levels compared to those without Alzheimer's, while the HDL levels are relatively consistent across both groups.

In [18]:
# How many patients have behavioral problems, and how does this compare to their Alzheimer's diagnosis status?
behavioral_problems_counts = data.groupby(['Diagnosis', 'BehavioralProblems']).size().reset_index(name='counts')
fig = px.bar(behavioral_problems_counts, x='Diagnosis', y='counts', color='BehavioralProblems', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'BehavioralProblems':'Behavioral Problems (0=No, 1=Yes)', 'counts':'Number of Patients'}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Behavioral Problems by Alzheimer's Diagnosis Status")
fig.show()

> **Interpretation**: A higher proportion of patients diagnosed with Alzheimer's exhibit behavioral problems compared to those without the diagnosis.

In [19]:
# What is the distribution of ADL scores among patients with and without Alzheimer's?
fig = px.box(data, x='Diagnosis', y='ADL', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'ADL':'Activities of Daily Living Score'}, template="plotly_dark")
fig.update_layout(title="Distribution of ADL Scores Among Patients with and without Alzheimer's")
fig.show()

> **Interpretation**: Patients with Alzheimer's have lower Activities of Daily Living (ADL) scores, indicating greater impairment in daily activities compared to those without Alzheimer's.

In [23]:
# How does diet quality vary among patients with different education levels?
fig = px.box(data, x='EducationLevel', y='DietQuality', labels={'EducationLevel':'Education Level', 'DietQuality':'Diet Quality Score'}, template="plotly_dark")
fig.update_layout(title="Diet Quality by Education Level")
fig.show()

> **Interpretation**: Diet quality scores are fairly consistent across different education levels, indicating that education level does not significantly influence diet quality among patients.



In [24]:
# How many patients have confusion symptoms, and what is their Alzheimer's diagnosis status?
confusion_counts = data.groupby(['Diagnosis', 'Confusion']).size().reset_index(name='counts')
fig = px.bar(confusion_counts, x='Diagnosis', y='counts', color='Confusion', barmode='group', labels={'Diagnosis':'Diagnosis (0=No, 1=Yes)', 'Confusion':'Confusion (0=No, 1=Yes)', 'counts':'Number of Patients'}, template="plotly_dark")
fig.update_layout(title="Number of Patients with Confusion Symptoms by Alzheimer's Diagnosis Status")
fig.show()

> **Interpretation**: Confusion symptoms are more prevalent among patients diagnosed with Alzheimer's compared to those without the diagnosis.

In [25]:
# What is the average systolic and diastolic blood pressure for patients with and without Alzheimer's?
avg_bp = data.groupby('Diagnosis')[['SystolicBP', 'DiastolicBP']].mean().reset_index()
fig = make_subplots(rows=1, cols=2, subplot_titles=('Average Systolic Blood Pressure', 'Average Diastolic Blood Pressure'))
fig.add_trace(go.Bar(x=avg_bp['Diagnosis'], y=avg_bp['SystolicBP'], name='SystolicBP'), row=1, col=1)
fig.add_trace(go.Bar(x=avg_bp['Diagnosis'], y=avg_bp['DiastolicBP'], name='DiastolicBP'), row=1, col=2)
fig.update_layout(title="Average Blood Pressure by Alzheimer's Diagnosis Status", template="plotly_dark")
fig.show()

> **Interpretation**: There is no significant difference in average systolic and diastolic blood pressure between patients with and without Alzheimer's.

In [26]:
# How many patients have diabetes, hypertension, and cardiovascular disease?
diseases = ['Diabetes', 'Hypertension', 'CardiovascularDisease']
fig = make_subplots(rows=1, cols=3, subplot_titles=diseases)
for i, disease in enumerate(diseases, 1):
    disease_counts = data[disease].value_counts()
    fig.add_trace(go.Bar(x=disease_counts.index, y=disease_counts.values, name=disease), row=1, col=i)
fig.update_layout(title="Number of Patients with Diabetes, Hypertension, and Cardiovascular Disease", template="plotly_dark")
fig.show()

> **Interpretation**: The majority of patients have hypertension, followed by cardiovascular disease and diabetes. The prevalence of these conditions is high among the patient population.