In [8]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

In [9]:
df_2015 = pd.read_csv('csv/2015_data.csv', low_memory=False)

In [10]:
# Convert columns to strings
df_2015['resident_status'] = df_2015['resident_status'].astype(str)
df_2015['education_2003_revision'] = df_2015['education_2003_revision'].astype(str)
df_2015['marital_status'] = df_2015['marital_status'].astype(str)
df_2015['sex'] = df_2015['sex'].astype(str)

# Create age slider
age_slider = widgets.IntSlider(
    value=30,
    min=0,
    max=100,
    step=1,
    description='Age:'
)

# Create resident_status dropdown
resident_status_dropdown = widgets.Dropdown(
    options=['RESIDENTS', 'INTRASTATE NONRESIDENTS', 'INTERSTATE NONRESIDENTS', 'FOREIGN RESIDENTS'],
    value='RESIDENTS',
    description='Resident Status:'
)

# Create education dropdown
education_dropdown = widgets.Dropdown(
    options=['8th grade or less', '9 - 12th grade, no diploma', 'high school graduate or GED completed', 'some college credit, but no degree', 'Associate degree', 'Bachelor’s degree', 'Master’s degree', 'Doctorate or professional degree', 'Unknown'],
    value='Unknown',
    description='Education:'
)

# Create marital_status dropdown
marital_status_dropdown = widgets.Dropdown(
    options=['Never married, single', 'Married', 'Widowed', 'Divorced', 'Marital Status unknown'],
    value='Marital Status unknown',
    description='Marital Status:'
)

# Create gender dropdown
gender_dropdown = widgets.Dropdown(
    options=['Male', 'Female'],
    value='Male',
    description='Gender:'
)

# Create output widget
output = widgets.Output()

# Display the interface and output widget
display(age_slider, resident_status_dropdown, education_dropdown, marital_status_dropdown, gender_dropdown, output)

def top10percentages(arr):
    # Get the indices of the top 10 largest numbers
    top_indices = np.argpartition(arr, -10)[-10:]

    # Get the values of the top 10 largest numbers
    top_numbers = arr[top_indices]

    # Sort the top numbers in descending order
    sorted_indices = np.argsort(top_numbers)[::-1]
    top_numbers = top_numbers[sorted_indices]
    top_indices = top_indices[sorted_indices]

    return top_numbers, top_indices

def update_visualization(change):
    with output:
        # Clear previous output
        clear_output()

        age = age_slider.value
        resident_status = resident_status_dropdown.value
        education = education_dropdown.value
        marital_status = marital_status_dropdown.value
        gender = gender_dropdown.value

        # Map selected label to gender code
        gender_code = {
            'Male': 'M',
            'Female': 'F'
        }

        # Map selected label to resident_status code
        resident_status_code = {
            'RESIDENTS': '1',
            'INTRASTATE NONRESIDENTS': '2',
            'INTERSTATE NONRESIDENTS': '3',
            'FOREIGN RESIDENTS': '4'
        }

        # Map selected label to education code
        education_code = {
            '8th grade or less': '1.0',
            '9 - 12th grade, no diploma': '2.0',
            'high school graduate or GED completed': '3.0',
            'some college credit, but no degree': '4.0',
            'Associate degree': '5.0',
            'Bachelor’s degree': '6.0',
            'Master’s degree': '7.0',
            'Doctorate or professional degree': '8.0',
            'Unknown': '9.0'
        }

        # Map selected label to marital_status code
        marital_status_code = {
            'Never married, single': 'S',
            'Married': 'M',
            'Widowed': 'W',
            'Divorced': 'D',
            'Marital Status unknown': 'U'
        }

        # Filter df_2015 based on user's selections
        filtered_df_2015 = df_2015[
            (df_2015['detail_age'] == int(age)) &
            (df_2015['resident_status'] == resident_status_code[resident_status]) &
            (df_2015['education_2003_revision'] == education_code[education]) &
            (df_2015['marital_status'] == marital_status_code[marital_status]) &
            (df_2015['sex'] == gender_code[gender])
        ]

        cause_counts = np.zeros(456)
        total_deaths = 0

        # Count the occurrences of each cause of death in the filtered dataset
        for c in filtered_df_2015['358_cause_recode']:
            cause_counts[c] += 1
            total_deaths += 1

        if total_deaths == 0:
            print("No data available for the selected criteria.")
            return

        cause_percentages = cause_counts / total_deaths * 100

        top_numbers, top_indices = top10percentages(cause_percentages)

        # Check if top_numbers contains any NaN values
        if np.isnan(top_numbers).all():
            print("No data available for the selected criteria.")

        # should be adapted to the new data when something is deleted
        elif np.isnan(top_numbers).any():
            for i in range(len(top_numbers)):
                if np.isnan(top_numbers[i]):
                    top_numbers = np.delete(top_numbers, i)
                    top_indices = np.delete(top_indices, i)
        else:
            # Create a pie chart
            fig, ax = plt.subplots(figsize=(8, 8))
            ax.pie(top_numbers, labels=top_indices, autopct='%1.1f%%')
            ax.set_title('Cause of Death Percentages')
            ax.axis('equal')
            plt.show()


# Call the update_visualization function when any of the widget values change
age_slider.observe(update_visualization, 'value')
resident_status_dropdown.observe(update_visualization, 'value')
education_dropdown.observe(update_visualization, 'value')
marital_status_dropdown.observe(update_visualization, 'value')
gender_dropdown.observe(update_visualization, 'value')

IntSlider(value=30, description='Age:')

Dropdown(description='Resident Status:', options=('RESIDENTS', 'INTRASTATE NONRESIDENTS', 'INTERSTATE NONRESID…

Dropdown(description='Education:', index=8, options=('8th grade or less', '9 - 12th grade, no diploma', 'high …

Dropdown(description='Marital Status:', index=4, options=('Never married, single', 'Married', 'Widowed', 'Divo…

Dropdown(description='Gender:', options=('Male', 'Female'), value='Male')

Output()

In [11]:
# Convert columns to strings
df_2015['hispanic_originrace_recode'] = df_2015['hispanic_originrace_recode'].astype(str)
df_2015['education_2003_revision'] = df_2015['education_2003_revision'].astype(str)
df_2015['marital_status'] = df_2015['marital_status'].astype(str)
df_2015['sex'] = df_2015['sex'].astype(str)

# Create age slider
age_slider = widgets.IntSlider(
    value=30,
    min=0,
    max=100,
    step=1,
    description='Age:'
)

# Create resident_status dropdown
hispanic_origin_dropdown = widgets.Dropdown(
    options=['1', '2', '3', '4', '5', '6', '7', '8', '9'],
    value='9',
    description='Hispanic origin:'
)

# Create education dropdown
education_dropdown = widgets.Dropdown(
    options=['1.0', '2.0', '3.0', '4.0', '5.0', '6.0', '7.0', '8.0', '9.0'],
    value='9.0',
    description='Education:'
)

# Create marital_status dropdown
marital_status_dropdown = widgets.Dropdown(
    options=['S', 'M', 'W', 'D', 'U'],
    value='U',
    description='Marital Status:'
)

# Create gender dropdown
gender_dropdown = widgets.Dropdown(
    options=['M', 'F'],
    value='M',
    description='Gender:'
)

# Create output widget
output = widgets.Output()

# Display the interface and output widget
display(age_slider, hispanic_origin_dropdown, education_dropdown, marital_status_dropdown, gender_dropdown, output)

def top10percentages(arr):
    # Get the indices of the top 10 largest numbers
    top_indices = np.argpartition(arr, -10)[-10:]

    # Get the values of the top 10 largest numbers
    top_numbers = arr[top_indices]

    # Sort the top numbers in descending order
    sorted_indices = np.argsort(top_numbers)[::-1]
    top_numbers = top_numbers[sorted_indices]
    top_indices = top_indices[sorted_indices]

    return top_numbers, top_indices

def update_visualization(change):
    with output:
        # Clear previous output
        clear_output()

        age = age_slider.value
        hispanic_origin = hispanic_origin_dropdown.value
        education = education_dropdown.value
        marital_status = marital_status_dropdown.value
        gender = gender_dropdown.value

        # Filter df_2015 based on user's selections
        filtered_df_2015 = df_2015[
            (df_2015['detail_age'] == int(age)) &
            (df_2015['hispanic_originrace_recode'] == hispanic_origin) &
            (df_2015['education_2003_revision'] == education) &
            (df_2015['marital_status'] == marital_status) &
            (df_2015['sex'] == gender)
        ]

        cause_counts = np.zeros(456)
        total_deaths = 0

        # Count the occurrences of each cause of death in the filtered dataset
        for c in filtered_df_2015['358_cause_recode']:
            cause_counts[c] += 1
            total_deaths += 1

        if total_deaths == 0:
            print("No data available for the selected criteria.")
            return

        cause_percentages = cause_counts / total_deaths * 100

        top_numbers, top_indices = top10percentages(cause_percentages)

        # Check if top_numbers contains any NaN values
        if np.isnan(top_numbers).all():
            print("No data available for the selected criteria.")

        # should be adapted to the new data when something is deleted
        elif np.isnan(top_numbers).any():
            for i in range(len(top_numbers)):
                if np.isnan(top_numbers[i]):
                    top_numbers = np.delete(top_numbers, i)
                    top_indices = np.delete(top_indices, i)
        else:
            # Create a pie chart
            fig, ax = plt.subplots(figsize=(8, 8))
            ax.pie(top_numbers, labels=top_indices, autopct='%1.1f%%')
            ax.set_title('Cause of Death Percentages')
            ax.axis('equal')
            plt.show()


# Call the update_visualization function when any of the widget values change
age_slider.observe(update_visualization, 'value')
resident_status_dropdown.observe(update_visualization, 'value')
education_dropdown.observe(update_visualization, 'value')
marital_status_dropdown.observe(update_visualization, 'value')
gender_dropdown.observe(update_visualization, 'value')

IntSlider(value=30, description='Age:')

Dropdown(description='Hispanic origin:', index=8, options=('1', '2', '3', '4', '5', '6', '7', '8', '9'), value…

Dropdown(description='Education:', index=8, options=('1.0', '2.0', '3.0', '4.0', '5.0', '6.0', '7.0', '8.0', '…

Dropdown(description='Marital Status:', index=4, options=('S', 'M', 'W', 'D', 'U'), value='U')

Dropdown(description='Gender:', options=('M', 'F'), value='M')

Output()

In [12]:
# Convert columns to strings
df_2015['education_2003_revision'] = df_2015['education_2003_revision'].astype(str)
df_2015['race'] = df_2015['race'].astype(str)
df_2015['sex'] = df_2015['sex'].astype(str)

# Create age slider
age_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=100,
    step=1,
    description='Age:'
)

# Create education dropdown
race_dropdown = widgets.Dropdown(
    options=['White', 'Black', 'Korean','Vietnamese','Indian',
            'Native American', 'Hawaiian', 'Chinese', 'Japanese',
            'other Asian or Pacific Islander', 'Filipino', 'Samoan','Guamanian'],
    value='Black',
    description='Race:'
)

# Create gender dropdown
gender_dropdown = widgets.Dropdown(
    options=['M', 'F'],
    value='M',
    description='Gender:'
)

# Create output widget
output = widgets.Output()

# Display the interface and output widget
display(age_slider, race_dropdown, gender_dropdown, output)

def top10percentages(arr):
    # Get the indices of the top 10 largest numbers
    top_indices = np.argpartition(arr, -10)[-10:]

    # Get the values of the top 10 largest numbers
    top_numbers = arr[top_indices]

    # Sort the top numbers in descending order
    sorted_indices = np.argsort(top_numbers)[::-1]
    top_numbers = top_numbers[sorted_indices]
    top_indices = top_indices[sorted_indices]

    return top_numbers, top_indices

def update_visualization(change):
    with output:
        # Clear previous output
        clear_output()

        age = age_slider.value
        resident_status = resident_status_dropdown.value
        education = education_dropdown.value
        marital_status = marital_status_dropdown.value
        gender = gender_dropdown.value

        # Filter df_2015 based on user's selections
        filtered_df_2015 = df_2015[
            (df_2015['detail_age'] == int(age)) &
            (df_2015['resident_status'] == resident_status) &
            (df_2015['education_2003_revision'] == education) &
            (df_2015['marital_status'] == marital_status) &
            (df_2015['sex'] == gender)
        ]

        cause_counts = np.zeros(456)
        total_deaths = 0

        # Count the occurrences of each cause of death in the filtered dataset
        for c in filtered_df_2015['358_cause_recode']:
            cause_counts[c] += 1
            total_deaths += 1

        if total_deaths == 0:
            print("No data available for the selected criteria.")
            return

        cause_percentages = cause_counts / total_deaths * 100

        top_numbers, top_indices = top10percentages(cause_percentages)

        # Check if top_numbers contains any NaN values
        if np.isnan(top_numbers).all():
            print("No data available for the selected criteria.")

        # should be adapted to the new data when something is deleted
        elif np.isnan(top_numbers).any():
            for i in range(len(top_numbers)):
                if np.isnan(top_numbers[i]):
                    top_numbers = np.delete(top_numbers, i)
                    top_indices = np.delete(top_indices, i)
        else:
            # Create a pie chart
            fig, ax = plt.subplots(figsize=(8, 8))
            ax.pie(top_numbers, labels=top_indices, autopct='%1.1f%%')
            ax.set_title('Cause of Death Percentages')
            ax.axis('equal')
            plt.show()


# Call the update_visualization function when any of the widget values change
age_slider.observe(update_visualization, 'value')
race_dropdown.observe(update_visualization, 'value')
gender_dropdown.observe(update_visualization, 'value')

IntSlider(value=0, description='Age:')

Dropdown(description='Race:', index=1, options=('White', 'Black', 'Korean', 'Vietnamese', 'Indian', 'Native Am…

Dropdown(description='Gender:', options=('M', 'F'), value='M')

Output()