In [24]:
import polars as pl
import plotly.express as px
import pyarrow.parquet as pq

## Target tables

In [25]:
dat = pl.from_arrow(pq.read_table("./apt103.parquet"))
dat = dat.with_columns(pl.col("tractcode").cast(pl.Utf8)).with_columns(percent_active = (pl.col("target")/pl.col("population") * 100))
dat.head(5)

tractcode,population,target,cnamelong,percent_active
str,i64,f64,str,f64
"""16029960100""",4386,806.0,"""Caribou County…",18.376653
"""16031950100""",2879,443.0,"""Cassia County""",15.387287
"""16055000500""",14213,112.5,"""Kootenai Count…",0.791529
"""16027021700""",11701,212.5,"""Canyon County""",1.816084
"""16067970400""",4494,325.5,"""Minidoka Count…",7.242991


In [26]:
dat.shape

(298, 5)

In [27]:
dat.select("target").describe()

describe,target
str,f64
"""count""",298.0
"""null_count""",0.0
"""mean""",340.682886
"""std""",403.380767
"""min""",0.0
"""25%""",74.0
"""50%""",206.0
"""75%""",433.0
"""max""",3127.0


In [28]:
rexburg_tracts = ["16065950100", "16065950200", "16065950400", "16065950301", "16065950500", "16065950302"]
courd_tracts = ["16055000402", "16055000401", "16055001200", "16055000900"]

In [29]:
rexburg = dat.select("tractcode", "cnamelong", "target").filter(pl.col("tractcode").is_in(rexburg_tracts))
rexburg

tractcode,cnamelong,target
str,str,f64
"""16065950500""","""Madison County…",1248.5
"""16065950100""","""Madison County…",1427.5
"""16065950302""","""Madison County…",3127.0
"""16065950200""","""Madison County…",820.0
"""16065950301""","""Madison County…",1392.0
"""16065950400""","""Madison County…",961.0


In [30]:
courd = dat.filter(pl.col("tractcode").is_in(courd_tracts))
courd

tractcode,population,target,cnamelong,percent_active
str,i64,f64,str,f64
"""16055001200""",6535,34.0,"""Kootenai Count…",0.520275
"""16055000402""",8894,66.0,"""Kootenai Count…",0.742073
"""16055000401""",5808,70.0,"""Kootenai Count…",1.205234
"""16055000900""",5978,71.0,"""Kootenai Count…",1.187688


## Histograms of the target and percent active

In [34]:
dat = dat.with_columns(madison_county = pl.col("tractcode").is_in(rexburg_tracts))
fig = px.histogram(dat, x="tractcode", y="target", color="madison_county")
fig.update_layout(title="Count of Active Members per Idaho Tract")
fig.show()

In [32]:
fig = px.histogram(dat, x="tractcode", y="percent_active", color="madison_county")
fig.update_layout(title="Percent of Active Members per Idaho Tract")
fig.show()

## Apt103 vs Rubber Duckies

In [33]:
rd_dat = pl.from_arrow(pq.read_table("./data/rubber_duckies_target.parquet"))
rd_dat = rd_dat.with_columns(pl.col("tractcode").cast(pl.Utf8))
rd_dat = dat.join(rd_dat, how="left", on="tractcode")\
            .with_columns(rd_percent_active = (pl.col("rubber_duckies_target")/pl.col("population") * 100))\
            .drop("cnamelong_right").filter(pl.col("tractcode").is_in(rexburg_tracts) | pl.col("tractcode").is_in(courd_tracts))
rd_dat


import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Histogram(histfunc="sum",y=rd_dat["target"], x=rd_dat["tractcode"], name="APT103"))
fig.add_trace(go.Histogram(histfunc="sum", y=rd_dat["rubber_duckies_target"], x=rd_dat["tractcode"], name="Rubber Duckies"))
fig.update_layout(title="APT103 vs Rubber Duckies Target From Rexburg/Courd Tracts")
fig.show()


### Differences

Across the tracts in Madison county and Kootenai county, there are some subtle differences between the two teams' targets.  The largest difference being in a Madison county tract of about 1000 people.  For most other tracts the Rubber Duckies's target is slightly above that of Apt103's target.  