In [14]:
import ibis
from ibis import _
from alaska_legislative_api import _parse

ibis.options.repr.interactive.max_depth = 3
ibis.options.repr.interactive.max_length = 10

In [102]:
parsed.members

In [103]:
parsed.votes

In [110]:
parsed.sessions

In [193]:
parsed.bills.StatusDate.topk(10)

In [190]:
b = parsed.bills.mutate(
    StatusMonth=_.StatusDate[0:3].cases(
        ("Jan", 1),
        ("Feb", 2),
        ("Mar", 3),
        ("Apr", 4),
        ("May", 5),
        ("Jun", 6),
        ("Jul", 7),
        ("Aug", 8),
        ("Sep", 9),
        ("Oct", 10),
        ("Nov", 11),
        ("Dec", 12),
        # else_=_.StatusDate[5:7].cast(int),
    ),
    StatusDay=_.StatusDate,
)
b.filter(_.StatusMonth.notnull()).group_by("StatusMonth", _.StatusDate[:3]).agg(
    n=_.count()
).order_by("StatusMonth")

In [177]:
with_real_dates = b.filter(_.StatusDate[3] != " ")
with_weird_dates = b.filter(_.StatusDate[3] == " ")
with_weird_dates.StatusDate[:4].name("n").topk(40).order_by("n").preview(max_rows=40)

In [185]:
with_weird_dates.filter(with_weird_dates.StatusDate.right(2).cast(int) == 2)

In [145]:
with_real_dates.Session.value_counts(), with_weird_dates.Session.value_counts()

(┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓
 ┃[1m [0m[1mSession[0m[1m [0m┃[1m [0m[1mSession_count[0m[1m [0m┃
 ┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩
 │ [2mint16[0m   │ [2mint64[0m         │
 ├─────────┼───────────────┤
 │      [1;36m14[0m │          [1;36m1461[0m │
 │      [1;36m30[0m │           [1;36m787[0m │
 │      [1;36m28[0m │           [1;36m749[0m │
 │      [1;36m34[0m │           [1;36m210[0m │
 │      [1;36m29[0m │           [1;36m759[0m │
 │      [1;36m18[0m │          [1;36m1135[0m │
 │      [1;36m26[0m │           [1;36m911[0m │
 │      [1;36m31[0m │           [1;36m674[0m │
 │      [1;36m22[0m │          [1;36m1122[0m │
 │      [1;36m32[0m │           [1;36m831[0m │
 │       [2m…[0m │             [2m…[0m │
 └─────────┴───────────────┘,
 ┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓
 ┃[1m [0m[1mSession[0m[1m [0m┃[1m [0m[1mSession_count[0m[1m [0m┃
 ┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩
 │ [2mint16[0m   │ [2mint64[0m         │
 ├─────────┼──────────────

In [174]:
parsed.sessions.filter(_.Session >= 12)

In [161]:
with_weird_dates.StatusDate[5:7].name("n").topk(40).order_by("n").preview(max_rows=40)

In [162]:
with_weird_dates.StatusDate[8:10].name("n").topk(40).order_by("n").preview(max_rows=40)

In [None]:
parsed.bills.StatusDate[5:7].cast(int).name("n").topk(40).order_by("n").preview(
    max_rows=40
)

In [169]:
parsed.bills.Flag1.value_counts()

In [96]:
parsed.sessions.to_parquet("sessions.parquet")

In [95]:
# Code is NOT a unique identifier for a person.
# MIL represents Mike Miller, Charisse Millett, etc...
parsed.members.group_by("Code").agg(
    n_formal=_.FormalName.nunique(),
    infos=ibis.struct(
        {
            "FirstName": _.FirstName,
            "LastName": _.LastName,
            # "Session": _.Session,
        }
    )
    .collect()
    .unique(),
).order_by(_.n_formal.desc())

In [5]:
# note the /pub ending and the format=csv query param
# https://docs.google.com/spreadsheets/d/1kErTlfIW_5F5MmlvBohTPtpPW0uXoNeq_neXrVHwhE8/edit?gid=0#gid=0
URL_PEOPLE = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRBkr9cSna3m4_64VgdGN3PIP9BgFw4wLi3k0dQn5peGY-I3kqAPY8r77xHKl-KHm0rTuJVMy3I8Qml/pub?single=true&output=csv&gid=925126040"
URL_MEMBERS_1_TO_9 = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRBkr9cSna3m4_64VgdGN3PIP9BgFw4wLi3k0dQn5peGY-I3kqAPY8r77xHKl-KHm0rTuJVMy3I8Qml/pub?single=true&output=csv&gid=49484443"
URL_MEMBERS_10_PLUS = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRBkr9cSna3m4_64VgdGN3PIP9BgFw4wLi3k0dQn5peGY-I3kqAPY8r77xHKl-KHm0rTuJVMy3I8Qml/pub?single=true&output=csv&gid=0"
people = ibis.read_csv(URL_PEOPLE)
people

In [6]:
members_10_plus = ibis.read_csv(URL_MEMBERS_10_PLUS)
members_10_plus = members_10_plus.cache()
members_10_plus

In [7]:
members_10_plus_lookup = members_10_plus.select("Session", "Code", "PersonID")
members_10_plus_lookup

In [22]:
members_10_plus_lookup.filter(
    _.Session == 11,
    _.Code == "And",
)

In [195]:
parsed.members.left_join(members_10_plus_lookup, ["Session", "Code"])

In [24]:
parsed.members.filter(
    _.Session == 11,
    _.Code == "And ",
)

In [None]:
# FirstName and LastName is ALSO not a unique identifier for a person:
# Senator Albert Adams went by Albert for sessions 12 to 20,
# and then switched to Al for session 21.

# In addition, (LastName,Session) is not a unique identifier.
# The 10th legislature had `W. E. "Brad" Bradley` and `Bob Bradley`

In [None]:
members.group_by("FirstName", "LastName").agg(
    n=_.count(), sessions=_.Session.collect().sort()
).order_by(_.n.desc())

In [13]:
bills = ibis.read_json("data/scraped_raw/bills/*.json").cache()
print(bills.count().execute())
bills

24275


In [30]:
vote_schema = {
    "Vote": "string",
    "VoteNum": "string",
    "Bill": "string",
    "Member": "string",
    "MemberParty": "string",
    "MemberChamber": "string",
    "MemberName": "string",
    "Title": "string",
    "VoteDate": "date",
    "Session": "int64",
}

votes = ibis.read_json("data/scraped_raw/votes/*.json").cache()
print(votes.count().execute())
votes

640343
