In [1]:
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
# State wise by using alter air
import altair as alt

# Assuming 'adhd_data' is your DataFrame with the ADHD prevalence data
# Replace this with the actual loading of your data
adhd_data = pd.read_csv('State_distribution_df.csv')

# Map state FIPS code to state abbreviations
state_abbr = {
    1: 'AL', 2: 'AK', 4: 'AZ', 5: 'AR', 6: 'CA', 8: 'CO', 9: 'CT', 10: 'DE', 11: 'DC',
    12: 'FL', 13: 'GA', 15: 'HI', 16: 'ID', 17: 'IL', 18: 'IN', 19: 'IA', 20: 'KS',
    21: 'KY', 22: 'LA', 23: 'ME', 24: 'MD', 25: 'MA', 26: 'MI', 27: 'MN', 28: 'MS',
    29: 'MO', 30: 'MT', 31: 'NE', 32: 'NV', 33: 'NH', 34: 'NJ', 35: 'NM', 36: 'NY',
    37: 'NC', 38: 'ND', 39: 'OH', 40: 'OK', 41: 'OR', 42: 'PA', 44: 'RI', 45: 'SC',
    46: 'SD', 47: 'TN', 48: 'TX', 49: 'UT', 50: 'VT', 51: 'VA', 53: 'WA', 54: 'WV',
    55: 'WI', 56: 'WY'
}

# Map state FIPSST to state abbreviation
adhd_data['state'] = adhd_data['FIPSST'].map(state_abbr)

adhd_data


Unnamed: 0.1,Unnamed: 0,Year,FIPSST,ADHD_Diagnosis,percentage,state
0,1,2016,1,Yes,12.804878,AL
1,3,2016,2,Yes,6.854345,AK
2,5,2016,4,Yes,9.070295,AZ
3,7,2016,5,Yes,12.515188,AR
4,9,2016,6,Yes,6.538049,CA
...,...,...,...,...,...,...
352,705,2022,51,Yes,10.661765,VA
353,707,2022,53,Yes,9.644670,WA
354,709,2022,54,Yes,14.558473,WV
355,711,2022,55,Yes,8.695652,WI


In [3]:
adhd_data.rename(columns={'FIPSST': 'id'}, inplace=True)

In [4]:
import altair as alt
from vega_datasets import data

# Create a selection that chooses the clicked state
click = alt.selection_point(fields=['id'], empty=False)

# Create a selection that allows multiple selections based on 'id'

year_slider = alt.binding_range(name="Select a year:", min=adhd_data["Year"].min(), max=adhd_data["Year"].max(), step=1)
selected_year = alt.selection_point(fields=["Year"], bind=year_slider)

# Use the vega_datasets to load the US states geography

states = alt.topo_feature(data.us_10m.url, 'states')
source = adhd_data
# Determine the fixed scale domain
min_percentage = adhd_data['percentage'].min()
max_percentage = adhd_data['percentage'].max()
 
map_chart = alt.Chart(source).mark_geoshape().encode(
    shape='geo:G',
    color=alt.Color('percentage:Q', scale=alt.Scale(domain=[min_percentage, max_percentage], scheme='blueorange'), title="ADHD Prevalence (%)"),
    tooltip=[
        alt.Tooltip("state:N", title="State"),
        alt.Tooltip("percentage:Q", title="ADHD Prevalence (%)", format=".2f")
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=states, key='id'),
    as_='geo'
).transform_filter(
    selected_year
).properties(
    width=800,
    height=500,
    title="ADHD Diagnosis Prevalence by State"
).add_params(
    selected_year,
    click
).project(
    type="albersUsa"
)



# Bar chart showing prevalence over years for selected state
bar_chart = alt.Chart(source).mark_bar().encode(
    x='Year:O',
    #y='percentage:Q',
    y=alt.Y('percentage:Q', scale=alt.Scale(domain=[min_percentage, max_percentage]), axis=alt.Axis(title='Percentage')),
    color='Year:O',
    #color=alt.Color('Year:O', scale=alt.Scale(scheme='tableau20')),  # Change color scheme here

    column = 'state:N',
    tooltip=[
        alt.Tooltip('Year:O', title='Year'),
        alt.Tooltip('percentage:Q', title='ADHD Prevalence (%)', format='.2f')
    ]
).transform_filter(
    click
).properties(
    width=100,
    height=200,
    title="ADHD Diagnosis Prevalence Over Years"
)

map_chart & bar_chart  # Display the charts vertically stacked

