In [2]:
#Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import scipy.stats as stats


In [3]:
# Read in dataset for Rates and Trends in Heart Disease Between 1999-2019
Heart_disease_df = pd.read_csv("Resources/Rates_and_Trends_in_Heart_Disease_2009_2019.csv")
Heart_disease_df["Year"] = Heart_disease_df["Year"].astype(float)
Heart_disease_df.head()

Unnamed: 0,Year,LocationAbbr,LocationDesc,GeographicLevel,DataSource,Class,Topic,Data_Value,Data_Value_Unit,Data_Value_Type,...,Data_Value_Footnote,Confidence_limit_Low,Confidence_limit_High,StratificationCategory1,Stratification1,StratificationCategory2,Stratification2,StratificationCategory3,Stratification3,LocationID
0,1999.0,AL,Autauga,County,NVSS,Cardiovascular Diseases,All heart disease,,"per 100,000","Age-Standardized, Spatiotemporally Smoothed Rate",...,Value suppressed,,,Age group,Ages 35-64 years,Race,American Indian/Alaska Native,Sex,Overall,1001
1,2013.0,AL,Autauga,County,NVSS,Cardiovascular Diseases,All heart disease,,"per 100,000","Age-Standardized, Spatiotemporally Smoothed Rate",...,Value suppressed,,,Age group,Ages 35-64 years,Race,American Indian/Alaska Native,Sex,Overall,1001
2,2014.0,AL,Autauga,County,NVSS,Cardiovascular Diseases,All heart disease,,"per 100,000","Age-Standardized, Spatiotemporally Smoothed Rate",...,Value suppressed,,,Age group,Ages 35-64 years,Race,American Indian/Alaska Native,Sex,Overall,1001
3,2005.0,AL,Autauga,County,NVSS,Cardiovascular Diseases,All heart disease,,"per 100,000","Age-Standardized, Spatiotemporally Smoothed Rate",...,Value suppressed,,,Age group,Ages 35-64 years,Race,American Indian/Alaska Native,Sex,Overall,1001
4,2012.0,AL,Autauga,County,NVSS,Cardiovascular Diseases,All heart disease,,"per 100,000","Age-Standardized, Spatiotemporally Smoothed Rate",...,Value suppressed,,,Age group,Ages 35-64 years,Race,American Indian/Alaska Native,Sex,Overall,1001


In [5]:
# Get relevant columns
Clean_Heart_disease_df = Heart_disease_df[["Year", "LocationAbbr","LocationDesc", "Topic", "Data_Value_Unit", "Stratification1",\
                                           "Stratification2", "Stratification3", "LocationID"]]

# Drop any rows with null values
Clean_Heart_disease_null = Clean_Heart_disease_df.dropna(axis=0, how = "any")
Clean_Heart_disease_null.head()

Unnamed: 0,Year,LocationAbbr,LocationDesc,Topic,Data_Value_Unit,Stratification1,Stratification2,Stratification3,LocationID
0,1999.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
1,2013.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
2,2014.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
3,2005.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
4,2012.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001


In [6]:
# Get a brief summary of the Clean_Heart_disease_null Dataframe
Clean_Heart_disease_null.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 9 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   Year             1048575 non-null  float64
 1   LocationAbbr     1048575 non-null  object 
 2   LocationDesc     1048575 non-null  object 
 3   Topic            1048575 non-null  object 
 4   Data_Value_Unit  1048575 non-null  object 
 5   Stratification1  1048575 non-null  object 
 6   Stratification2  1048575 non-null  object 
 7   Stratification3  1048575 non-null  object 
 8   LocationID       1048575 non-null  int64  
dtypes: float64(1), int64(1), object(7)
memory usage: 72.0+ MB


In [12]:
Heart_disease_rename = Clean_Heart_disease_null.rename(columns = {"LocationAbbr": "US States", "LocationDesc": "US County",\
                                                               "Topic": "Heart Disease Type", "Data_Value_Unit": "Rate per 100,000 population",\
                                                               "Stratification1": "Age range", "Stratification2": "Ethnicity", "Stratification3": "Gender"})
Heart_disease_rename.head()

Unnamed: 0,Year,US States,US County,Heart Disease Type,"Rate per 100,000 population",Age range,Ethnicity,Gender,LocationID
0,1999.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
1,2013.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
2,2014.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
3,2005.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
4,2012.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001


In [16]:
# Filter the Dataframe to include only the desired years
filtered_Heart_disease_rename = Heart_disease_rename[(Heart_disease_rename['Year'] >= 2009) & (Heart_disease_rename['Year'] <= 2019)]
filtered_Heart_disease_rename.head(20)

Unnamed: 0,Year,US States,US County,Heart Disease Type,"Rate per 100,000 population",Age range,Ethnicity,Gender,LocationID
1,2013.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
2,2014.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
4,2012.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
5,2010.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
6,2009.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
7,2011.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
9,2019.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
10,2018.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
12,2016.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
13,2015.0,AL,Autauga,All heart disease,"per 100,000",Ages 35-64 years,American Indian/Alaska Native,Overall,1001
