In [1]:
import plotly.express as px
import pandas as pd
import plotly.io as pio

pio.renderers.default = "notebook_connected" #Running on VS Code

# Load data
df = pd.read_csv("../data/cleaned_data_week1_2.csv") 
df.head()

Unnamed: 0,avganncount,avgdeathsperyear,target_deathrate,incidencerate,medincome,popest2015,povertypercent,binnedinc,medianage,medianagemale,...,pctempprivcoverage,pctpubliccoverage,pctpubliccoveragealone,pctwhite,pctblack,pctasian,birthrate,avghouseholdsize,county,state
0,1397.0,469,164.9,489.8,61898,260131,11.2,"(61494.5, 125635]",39.3,36.9,...,41.6,32.9,14.0,81.780529,2.594728,4.821857,6.118831,2.54,Kitsap County,Washington
1,173.0,70,161.3,411.6,48127,43269,18.6,"(48021.6, 51046.4]",33.0,32.2,...,43.6,31.1,15.3,89.228509,0.969102,2.246233,4.333096,2.34,Kittitas County,Washington
2,102.0,50,174.7,349.7,49348,21026,14.6,"(48021.6, 51046.4]",45.0,44.0,...,34.9,42.1,21.1,90.92219,0.739673,0.465898,3.729488,2.62,Klickitat County,Washington
3,427.0,202,194.8,430.4,44243,75882,17.1,"(42724.4, 45201]",42.8,42.2,...,35.0,45.3,25.0,91.744686,0.782626,1.161359,4.603841,2.52,Lewis County,Washington
4,57.0,26,144.4,350.1,49955,10321,12.5,"(48021.6, 51046.4]",48.3,47.8,...,35.1,44.0,22.7,94.104024,0.270192,0.66583,6.796657,2.34,Lincoln County,Washington


In [2]:
# Compute death_rate per 100k
df["death_rate"] = df["avgdeathsperyear"] / df["popest2015"] * 100000

# Get US state abbreviations mapping
us_state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR',
    'California': 'CA', 'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE',
    'District of Columbia': 'DC', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI',
    'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA',
    'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME',
    'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN',
    'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE',
    'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM',
    'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH',
    'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI',
    'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX',
    'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Map full state names to abbreviations
df["state_abbr"] = df["state"].map(us_state_abbrev)

# Group by state and take the mean
state_df = df.groupby("state_abbr")[["avgdeathsperyear", "death_rate", "povertypercent"]].mean().reset_index()

In [3]:
fig = px.choropleth(
    state_df,
    locations='state_abbr',
    locationmode='USA-states',
    color='death_rate',
    scope="usa",
    color_continuous_scale='Reds',
    labels={'death_rate': 'Deaths per 100K'},
    title='Interactive: Cancer Mortality Rate by State'
)
fig.show()

In [4]:
fig = px.scatter(
    state_df,
    x='povertypercent',
    y='avgdeathsperyear',
    hover_name='state_abbr',
    trendline='ols',
    title='Interactive: Poverty vs. Cancer Deaths by State'
)
fig.show()