In [185]:
%matplotlib inline
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine, text
import sqlite3
from pprint import pprint


In [186]:
# Load the data into a DataFrame
raw_df = pd.read_csv("../Resources/U.S._Chronic_Disease_Indicators.csv")

sqlite_file = '../Resources/U.S._Chronic_Disease_Indicators_Avery.sqlite'  # This is the SQLite file name
conn = sqlite3.connect(sqlite_file)  # This creates or opens the SQLite file

# Write the DataFrame to the SQLite database
table_name = 'US_Chronic_Disease_Indicators'  # Name of the table in SQLite
raw_df.to_sql(table_name, conn, if_exists='replace', index=False)

# View the data
query = f"SELECT * FROM `{table_name}` LIMIT 5;"  # Query to fetch the first 5 rows
result = pd.read_sql_query(query, conn)
print("Preview of the data written to the SQLite database:")
print(result)


Preview of the data written to the SQLite database:
   YearStart  YearEnd LocationAbbr LocationDesc DataSource     Topic  \
0       2019     2019           AR     Arkansas      BRFSS  Diabetes   
1       2019     2019           ID        Idaho      BRFSS  Diabetes   
2       2019     2019           IN      Indiana      YRBSS     Sleep   
3       2019     2019           IA         Iowa       NVSS    Asthma   
4       2019     2019           IA         Iowa      BRFSS    Asthma   

                                            Question Response DataValueUnit  \
0                              Diabetes among adults     None             %   
1                              Diabetes among adults     None             %   
2    Short sleep duration among high school students     None             %   
3  Asthma mortality among all people, underlying ...     None        Number   
4                        Current asthma among adults     None             %   

      DataValueType  ...  TopicID  Quest

In [187]:
query = "SELECT * FROM US_Chronic_Disease_Indicators"
sql_df = pd.read_sql(query, conn)

sql_df.head(10)

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,DataSource,Topic,Question,Response,DataValueUnit,DataValueType,...,TopicID,QuestionID,ResponseID,DataValueTypeID,StratificationCategoryID1,StratificationID1,StratificationCategoryID2,StratificationID2,StratificationCategoryID3,StratificationID3
0,2019,2019,AR,Arkansas,BRFSS,Diabetes,Diabetes among adults,,%,Crude Prevalence,...,DIA,DIA01,,CRDPREV,SEX,SEXM,,,,
1,2019,2019,ID,Idaho,BRFSS,Diabetes,Diabetes among adults,,%,Crude Prevalence,...,DIA,DIA01,,CRDPREV,SEX,SEXM,,,,
2,2019,2019,IN,Indiana,YRBSS,Sleep,Short sleep duration among high school students,,%,Crude Prevalence,...,SLEP,SLP02,,CRDPREV,GRADE,GRD12,,,,
3,2019,2019,IA,Iowa,NVSS,Asthma,"Asthma mortality among all people, underlying ...",,Number,Number,...,AST,AST01,,NMBR,OVERALL,OVR,,,,
4,2019,2019,IA,Iowa,BRFSS,Asthma,Current asthma among adults,,%,Crude Prevalence,...,AST,AST02,,CRDPREV,AGE,AGE1844,,,,
5,2019,2019,IA,Iowa,NVSS,Diabetes,"Diabetes mortality among all people, underlyin...",,Number,Number,...,DIA,DIA03,,NMBR,AGE,AGE0_44,,,,
6,2019,2019,IA,Iowa,BRFSS,Health Status,Recent activity limitation among adults,,Number,Crude Mean,...,HEA,HEA04,,CRDMEAN,SEX,SEXF,,,,
7,2019,2019,IA,Iowa,BRFSS,Mental Health,Depression among adults,,%,Crude Prevalence,...,MEN,MEN02,,CRDPREV,RACE,MRC,,,,
8,2019,2019,KS,Kansas,NVSS,Diabetes,"Diabetes mortality among all people, underlyin...",,Number,Number,...,DIA,DIA03,,NMBR,AGE,AGE4564,,,,
9,2019,2019,LA,Louisiana,BRFSS,Mental Health,Depression among adults,,%,Crude Prevalence,...,MEN,MEN02,,CRDPREV,RACE,HIS,,,,


In [188]:
# Close the connection
conn.close()

In [189]:
# Drop null and unnecessary columns from the df
columns_to_drop = ["YearStart","LocationAbbr","Response","DataValueAlt","DataValueFootnoteSymbol",
                   "StratificationCategory2","Stratification2","StratificationCategoryID1","DataValueTypeID","QuestionID","ResponseID",
                   "StratificationCategory3","Stratification3","StratificationCategoryID2","StratificationID2","StratificationCategoryID3",
                   "StratificationID3","LocationID","LowConfidenceLimit","HighConfidenceLimit","DataValueFootnote","DataSource",
                   "StratificationCategory1",]
dropped_df = sql_df.drop(columns_to_drop, axis=1)

dropped_df.head(10)

Unnamed: 0,YearEnd,LocationDesc,Topic,Question,DataValueUnit,DataValueType,DataValue,Stratification1,Geolocation,TopicID,StratificationID1
0,2019,Arkansas,Diabetes,Diabetes among adults,%,Crude Prevalence,13.6,Male,POINT (-92.27449074299966 34.74865012400045),DIA,SEXM
1,2019,Idaho,Diabetes,Diabetes among adults,%,Crude Prevalence,10.6,Male,POINT (-114.3637300419997 43.682630005000476),DIA,SEXM
2,2019,Indiana,Sleep,Short sleep duration among high school students,%,Crude Prevalence,,Grade 12,POINT (-86.14996019399968 39.766910452000445),SLEP,GRD12
3,2019,Iowa,Asthma,"Asthma mortality among all people, underlying ...",Number,Number,54.0,Overall,POINT (-93.81649055599968 42.46940091300047),AST,OVR
4,2019,Iowa,Asthma,Current asthma among adults,%,Crude Prevalence,10.3,Age 18-44,POINT (-93.81649055599968 42.46940091300047),AST,AGE1844
5,2019,Iowa,Diabetes,"Diabetes mortality among all people, underlyin...",Number,Number,54.0,Age 0-44,POINT (-93.81649055599968 42.46940091300047),DIA,AGE0_44
6,2019,Iowa,Health Status,Recent activity limitation among adults,Number,Crude Mean,2.3,Female,POINT (-93.81649055599968 42.46940091300047),HEA,SEXF
7,2019,Iowa,Mental Health,Depression among adults,%,Crude Prevalence,31.0,"Multiracial, non-Hispanic",POINT (-93.81649055599968 42.46940091300047),MEN,MRC
8,2019,Kansas,Diabetes,"Diabetes mortality among all people, underlyin...",Number,Number,524.0,Age 45-64,POINT (-98.20078122699965 38.34774030000045),DIA,AGE4564
9,2019,Louisiana,Mental Health,Depression among adults,%,Crude Prevalence,29.3,Hispanic,POINT (-92.44568007099969 31.31266064400046),MEN,HIS


In [190]:
# Rename columns for better readability
renamed_df = dropped_df.rename(columns = {"YearEnd": "Year Recorded", "LocationDesc": "State", 
                                          "Stratification1": "Stratification", "StratificationID1": "StratID"})
# Sort the df by year oldest to newest and by state in alphabetical order
cleaned_df = renamed_df.sort_values(by=["Year Recorded", "State"], ascending=[True, True])
cleaned_df.reset_index(drop=True, inplace=True)

# Remove the word POINT from the Geolocation column so it just has latitude and longitude coordinates
cleaned_df["Geolocation"] = cleaned_df["Geolocation"].str.replace("POINT", "", regex=False)

# Split the Latitude and Longitude coordinates into two columns
cleaned_df[['Latitude', 'Longitude']] = cleaned_df['Geolocation'].str.extract(r'\(([^ ]+) ([^ ]+)\)')

cleaned_df.head(10)

Unnamed: 0,Year Recorded,State,Topic,Question,DataValueUnit,DataValueType,DataValue,Stratification,Geolocation,TopicID,StratID,Latitude,Longitude
0,2019,Alabama,Cancer,"Cervical cancer mortality among all females, u...","per 100,000",Age-adjusted Rate,,Male,(-86.63186076199969 32.84057112200048),CAN,SEXM,-86.63186076199969,32.84057112200048
1,2019,Alabama,Cancer,"Invasive cancer (all sites combined), incidence","per 100,000",Age-adjusted Rate,406.3,Female,(-86.63186076199969 32.84057112200048),CAN,SEXF,-86.63186076199969,32.84057112200048
2,2019,Alabama,Cancer,"Prostate cancer mortality among all males, und...","per 100,000",Crude Rate,,"American Indian or Alaska Native, non-Hispanic",(-86.63186076199969 32.84057112200048),CAN,AIAN,-86.63186076199969,32.84057112200048
3,2019,Alabama,Cancer,Colon and rectum (colorectal) cancer mortality...,"per 100,000",Age-adjusted Rate,15.2,Overall,(-86.63186076199969 32.84057112200048),CAN,OVR,-86.63186076199969,32.84057112200048
4,2019,Alabama,Cancer,Lung and bronchial cancer mortality among all ...,Number,Number,2349.0,"White, non-Hispanic",(-86.63186076199969 32.84057112200048),CAN,WHT,-86.63186076199969,32.84057112200048
5,2019,Alabama,Cancer,"Prostate cancer mortality among all males, und...",Number,Number,,Female,(-86.63186076199969 32.84057112200048),CAN,SEXF,-86.63186076199969,32.84057112200048
6,2019,Alabama,Cancer,Colon and rectum (colorectal) cancer mortality...,"per 100,000",Crude Rate,,"American Indian or Alaska Native, non-Hispanic",(-86.63186076199969 32.84057112200048),CAN,AIAN,-86.63186076199969,32.84057112200048
7,2019,Alabama,Cancer,"Invasive cancer (all sites combined), incidence","per 100,000",Age-adjusted Rate,452.3,Overall,(-86.63186076199969 32.84057112200048),CAN,OVR,-86.63186076199969,32.84057112200048
8,2019,Alabama,Cancer,Colon and rectum (colorectal) cancer mortality...,"per 100,000",Age-adjusted Rate,19.9,"Black, non-Hispanic",(-86.63186076199969 32.84057112200048),CAN,BLK,-86.63186076199969,32.84057112200048
9,2019,Alabama,Cancer,"Cervical cancer mortality among all females, u...","per 100,000",Crude Rate,,"Asian or Pacific Islander, non-Hispanic",(-86.63186076199969 32.84057112200048),CAN,API,-86.63186076199969,32.84057112200048


In [191]:
# Filter data by year, state and Topic
groupby_df = cleaned_df.groupby(["Topic"]).size().reset_index(name='Topic Count per all regions')

groupby_df

Unnamed: 0,Topic,Topic Count per all regions
0,Alcohol,25321
1,Arthritis,18645
2,Asthma,10511
3,Cancer,22358
4,Cardiovascular Disease,30709
5,Chronic Kidney Disease,104
6,Chronic Obstructive Pulmonary Disease,26951
7,Cognitive Health and Caregiving,9904
8,Diabetes,17318
9,Disability,5060


In [192]:
# Filter data by my question related to Cardio Vascular Disease
cardio_df = cleaned_df[cleaned_df['Topic'].str.contains("Cardiovascular Disease", na=False)]

cardio_df.head(10)


Unnamed: 0,Year Recorded,State,Topic,Question,DataValueUnit,DataValueType,DataValue,Stratification,Geolocation,TopicID,StratID,Latitude,Longitude
172,2019,Alabama,Cardiovascular Disease,Hospitalization for heart failure as principal...,"cases per 1,000",Crude Rate,71.43,"American Indian or Alaska Native, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,AIAN,-86.63186076199969,32.84057112200048
194,2019,Alabama,Cardiovascular Disease,Cerebrovascular disease (stroke) mortality amo...,"cases per 100,000",Crude Rate,304.4,Age >=65,(-86.63186076199969 32.84057112200048),CVD,AGE65P,-86.63186076199969,32.84057112200048
199,2019,Alabama,Cardiovascular Disease,Taking medicine for high cholesterol among adults,%,Crude Prevalence,,"Hawaiian or Pacific Islander, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,HAPI,-86.63186076199969,32.84057112200048
201,2019,Alabama,Cardiovascular Disease,Hospitalization for heart failure as principal...,"cases per 1,000",Age-adjusted Rate,,Hispanic,(-86.63186076199969 32.84057112200048),CVD,HIS,-86.63186076199969,32.84057112200048
206,2019,Alabama,Cardiovascular Disease,Diseases of the heart mortality among all peop...,"cases per 100,000",Age-adjusted Rate,219.0,"White, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,WHT,-86.63186076199969,32.84057112200048
211,2019,Alabama,Cardiovascular Disease,Taking medicine for high cholesterol among adults,%,Crude Prevalence,54.0,Age >=65,(-86.63186076199969 32.84057112200048),CVD,AGE65P,-86.63186076199969,32.84057112200048
216,2019,Alabama,Cardiovascular Disease,High cholesterol among adults who have been sc...,%,Crude Prevalence,31.3,"American Indian or Alaska Native, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,AIAN,-86.63186076199969,32.84057112200048
222,2019,Alabama,Cardiovascular Disease,Diseases of the heart mortality among all peop...,"cases per 100,000",Crude Rate,238.4,"Black, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,BLK,-86.63186076199969,32.84057112200048
224,2019,Alabama,Cardiovascular Disease,Coronary heart disease mortality among all peo...,Number,Number,3795.0,"White, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,WHT,-86.63186076199969,32.84057112200048
228,2019,Alabama,Cardiovascular Disease,Cerebrovascular disease (stroke) mortality amo...,Number,Number,,"Asian, non-Hispanic",(-86.63186076199969 32.84057112200048),CVD,ASN,-86.63186076199969,32.84057112200048


In [193]:
# Show all unique questions
print(len(cardio_df), 'Records')
print(cardio_df["Question"].unique())

30709 Records
['Hospitalization for heart failure as principal diagnosis, Medicare-beneficiaries aged 65 years and older'
 'Cerebrovascular disease (stroke) mortality among all people, underlying cause'
 'Taking medicine for high cholesterol among adults'
 'Diseases of the heart mortality among all people, underlying cause'
 'High cholesterol among adults who have been screened'
 'Coronary heart disease mortality among all people, underlying cause'
 'High blood pressure among adults'
 'Taking medicine to control high blood pressure among adults with high blood pressure']


In [194]:
# Show all Stratifications
print(cardio_df["Stratification"].unique())

['American Indian or Alaska Native, non-Hispanic' 'Age >=65'
 'Hawaiian or Pacific Islander, non-Hispanic' 'Hispanic'
 'White, non-Hispanic' 'Black, non-Hispanic' 'Asian, non-Hispanic' 'Male'
 'Age 45-64' 'Female' 'Age 0-44' 'Multiracial, non-Hispanic' 'Overall'
 'Age 18-44' 'Asian or Pacific Islander, non-Hispanic']


In [195]:
# Create filtered df for data with age groups
age_group_df = cardio_df[cardio_df['Stratification'].isin(['Age >=65', 'Age 45-64', 'Age 18-44', 'Age 0-44'])]
print(len(age_group_df),'Records')
age_group_df.head(10)

4125 Records


Unnamed: 0,Year Recorded,State,Topic,Question,DataValueUnit,DataValueType,DataValue,Stratification,Geolocation,TopicID,StratID,Latitude,Longitude
194,2019,Alabama,Cardiovascular Disease,Cerebrovascular disease (stroke) mortality amo...,"cases per 100,000",Crude Rate,304.4,Age >=65,(-86.63186076199969 32.84057112200048),CVD,AGE65P,-86.63186076199969,32.84057112200048
211,2019,Alabama,Cardiovascular Disease,Taking medicine for high cholesterol among adults,%,Crude Prevalence,54.0,Age >=65,(-86.63186076199969 32.84057112200048),CVD,AGE65P,-86.63186076199969,32.84057112200048
273,2019,Alabama,Cardiovascular Disease,Taking medicine to control high blood pressure...,%,Crude Prevalence,86.8,Age 45-64,(-86.63186076199969 32.84057112200048),CVD,AGE4564,-86.63186076199969,32.84057112200048
313,2019,Alabama,Cardiovascular Disease,Coronary heart disease mortality among all peo...,Number,Number,123.0,Age 0-44,(-86.63186076199969 32.84057112200048),CVD,AGE0_44,-86.63186076199969,32.84057112200048
369,2019,Alabama,Cardiovascular Disease,Coronary heart disease mortality among all peo...,"cases per 100,000",Crude Rate,434.6,Age >=65,(-86.63186076199969 32.84057112200048),CVD,AGE65P,-86.63186076199969,32.84057112200048
479,2019,Alabama,Cardiovascular Disease,Coronary heart disease mortality among all peo...,"cases per 100,000",Crude Rate,86.0,Age 45-64,(-86.63186076199969 32.84057112200048),CVD,AGE4564,-86.63186076199969,32.84057112200048
515,2019,Alabama,Cardiovascular Disease,Taking medicine for high cholesterol among adults,%,Crude Prevalence,18.5,Age 18-44,(-86.63186076199969 32.84057112200048),CVD,AGE1844,-86.63186076199969,32.84057112200048
525,2019,Alabama,Cardiovascular Disease,High blood pressure among adults,%,Crude Prevalence,20.8,Age 18-44,(-86.63186076199969 32.84057112200048),CVD,AGE1844,-86.63186076199969,32.84057112200048
531,2019,Alabama,Cardiovascular Disease,Diseases of the heart mortality among all peop...,Number,Number,2751.0,Age 45-64,(-86.63186076199969 32.84057112200048),CVD,AGE4564,-86.63186076199969,32.84057112200048
551,2019,Alabama,Cardiovascular Disease,High cholesterol among adults who have been sc...,%,Crude Prevalence,43.8,Age 45-64,(-86.63186076199969 32.84057112200048),CVD,AGE4564,-86.63186076199969,32.84057112200048


In [196]:
result = age_group_df.groupby(['State', 'Stratification'])['DataValue'].sum().unstack(fill_value=0)
result_list = result.values.tolist()
print(result)
#pprint(result_list)
# Use these reults to insert a new "properties" feature in geojson_data.js
# call it "ageGroup":[list,of,ages]

Stratification        Age 0-44  Age 18-44  Age 45-64   Age >=65
State                                                          
Alabama                 2436.7      223.5    15891.2    58650.5
Alaska                   178.8      140.0     2405.1     7347.5
Arizona                 1468.7      166.4    12160.0    68090.7
Arkansas                1470.4      201.8    11648.7    41986.9
California              6880.9      157.3    59112.3   302223.7
Colorado                 928.8      146.7     7572.6    38506.8
Connecticut              543.4      179.8     6154.0    35656.6
Delaware                 195.9      178.6     2862.3    15606.5
District of Columbia     136.0      158.5     3016.3    11436.3
Florida                 4720.0       87.4    41029.4   243425.0
Georgia                 3485.3      197.4    24637.6    80593.8
Guam                       0.0      182.3      418.9      490.3
Hawaii                   280.1      159.3     3343.9    16461.6
Idaho                    342.4      159.

In [197]:
# Create filtered df for Male and Female data
gender_df = cardio_df[cardio_df['Stratification'].isin(['Male', 'Female'])]
print(len(gender_df),'Records')
gender_df.head(10)

5504 Records


Unnamed: 0,Year Recorded,State,Topic,Question,DataValueUnit,DataValueType,DataValue,Stratification,Geolocation,TopicID,StratID,Latitude,Longitude
256,2019,Alabama,Cardiovascular Disease,Taking medicine for high cholesterol among adults,%,Age-adjusted Prevalence,31.9,Male,(-86.63186076199969 32.84057112200048),CVD,SEXM,-86.63186076199969,32.84057112200048
296,2019,Alabama,Cardiovascular Disease,Hospitalization for heart failure as principal...,"cases per 1,000",Age-adjusted Rate,29.23,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
306,2019,Alabama,Cardiovascular Disease,High blood pressure among adults,%,Crude Prevalence,40.8,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
312,2019,Alabama,Cardiovascular Disease,High cholesterol among adults who have been sc...,%,Age-adjusted Prevalence,31.9,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
336,2019,Alabama,Cardiovascular Disease,Cerebrovascular disease (stroke) mortality amo...,"cases per 100,000",Age-adjusted Rate,48.7,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
434,2019,Alabama,Cardiovascular Disease,Taking medicine to control high blood pressure...,%,Crude Prevalence,85.2,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
438,2019,Alabama,Cardiovascular Disease,Coronary heart disease mortality among all peo...,"cases per 100,000",Age-adjusted Rate,55.7,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
444,2019,Alabama,Cardiovascular Disease,Taking medicine for high cholesterol among adults,%,Crude Prevalence,37.4,Female,(-86.63186076199969 32.84057112200048),CVD,SEXF,-86.63186076199969,32.84057112200048
570,2019,Alabama,Cardiovascular Disease,High blood pressure among adults,%,Crude Prevalence,44.3,Male,(-86.63186076199969 32.84057112200048),CVD,SEXM,-86.63186076199969,32.84057112200048
653,2019,Alabama,Cardiovascular Disease,Coronary heart disease mortality among all peo...,"cases per 100,000",Crude Rate,122.8,Male,(-86.63186076199969 32.84057112200048),CVD,SEXM,-86.63186076199969,32.84057112200048


In [198]:
result = gender_df.groupby(['State', 'Stratification'])['DataValue'].sum().unstack(fill_value=0)
result_list = result.values.tolist()
print(result)
#pprint(result_list)
# Use these reults to insert a new "properties" feature in geojson_data.js
# call it "gender":[list,of,genders]

Stratification            Female        Male
State                                       
Alabama                 53857.19    58978.40
Alaska                   4588.46     6487.64
Arizona                 51549.80    64472.83
Arkansas                35590.71    40739.18
California             254420.11   293659.12
Colorado                30893.49    35338.23
Connecticut             34362.43    35693.88
Delaware                11815.38    13749.41
District of Columbia     8129.41     8416.17
Florida                217581.94   247476.27
Georgia                 83706.18    91328.92
Guam                      658.40      665.80
Hawaii                  11387.26    14250.50
Idaho                   12338.68    15428.10
Illinois               120286.68   124612.57
Indiana                 64143.70    69352.61
Iowa                    30185.90    33982.96
Kansas                  26033.85    29709.92
Kentucky                48693.73    53593.33
Louisiana               49349.43    54346.82
Maine     

In [199]:
pprint(result_list)

[[53857.19, 58978.4],
 [4588.46, 6487.64],
 [51549.8, 64472.83],
 [35590.71, 40739.18],
 [254420.11, 293659.12],
 [30893.49, 35338.23],
 [34362.43, 35693.88],
 [11815.38, 13749.41],
 [8129.41, 8416.17],
 [217581.94, 247476.27],
 [83706.18000000001, 91328.92],
 [658.4, 665.8],
 [11387.26, 14250.5],
 [12338.68, 15428.1],
 [120286.68000000001, 124612.57],
 [64143.7, 69352.61],
 [30185.9, 33982.96],
 [26033.85, 29709.92],
 [48693.73, 53593.33],
 [49349.43, 54346.82],
 [14259.16, 16791.72],
 [51980.23, 53552.02],
 [60918.74, 62695.96],
 [114453.46, 121977.41],
 [39195.76, 42752.38],
 [35046.9, 39394.36],
 [65499.64, 71261.9],
 [9672.33, 13043.4],
 [15564.64, 17083.79],
 [25082.47, 33671.67],
 [13297.01, 16124.62],
 [84914.37, 87356.92],
 [17508.79, 21300.55],
 [194875.54, 195128.02],
 [92209.19, 98600.41],
 [7374.62, 9143.96],
 [132282.17, 139875.74],
 [43027.13, 47611.66],
 [30815.04, 35753.46],
 [146539.29, 151569.34],
 [814.0, 796.9],
 [13719.34, 14310.5],
 [48421.81, 55274.64],
 [8919.8

In [200]:
# Save the dataset as a CSV file
cardio_df.to_csv('../Resources/AJ_cardio_dataframe.csv', index=False)