In [36]:
import polars as pl
import plotly.graph_objects as go 
from dash import Dash, dcc
import dash_ag_grid as dag

#-------------------------------------------------------------------------------
#  This data set has 91.4% of entries from the United States, and 7.5% of
#  its entries from "No state or country". The second most identified country
#  is England, with 0.26% of the datasets entries. This visualization will focus
#  on USA only. A dashboard similar to the code sample will have a dash_ag table,
#  a graph objects to show 4 histogram overlays from east, central, mountain,
#  and pacific regions, and a choropleth map to show the number of entries by
#  American state.

#-------------------------------------------------------------------------------

# split long path name for PEP-8 compliance
source_file = 'https://raw.githubusercontent.com/plotly/Figure-Friday/refs/'
source_file += 'heads/main/2025/week-4/Post45_NEAData_Final.csv'

df = (
    pl.read_csv(source_file)
    .with_columns(
        WRITERS_AGE = (pl.col('nea_grant_year') - pl.col('birth_year'))
    )
    .with_row_index(offset=1)
    .with_columns(
        COUNTRY_PCT = 
            (100 * pl.col('country').count().over('country')
            /
            pl.col('index').max())
            .cast(pl.Float32())
    )
)

# df = pl.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2025/week-4/Post45_NEAData_Final.csv")
# df['age of writer'] = df.nea_grant_year - df.birth_year
#fig = px.histogram(df, x='age of writer')
#fig.show()
(
    df
    .unique('country')
    .select(pl.col('country', 'COUNTRY_PCT'))
    .sort('COUNTRY_PCT', descending=True)
)


country,COUNTRY_PCT
str,f32
"""USA""",91.390015
"""No state or country listed""",7.476383
"""England""",0.215924
"""Canada""",0.161943
"""France""",0.134953
…,…
"""Mexico""",0.026991
"""USA / Israel""",0.026991
"""China""",0.026991
"""Turkey""",0.026991
