## Analyze appeals data from the Michigan Department of Licensing and Regulatory Affairs

*Note:* Michigan only provided partial data for 2011 and 2021, so those years are excluded from some parts of the analysis.

In [1]:
import pandas as pd

In [2]:
#2011-2015
df_raw = pd.read_excel(
    "../../data/states/michigan/Copy_of_Expunction_FOIA_2011_-_2015(1).xlsx",
    skiprows = 2,
     names = [
        "blank", 
        "blank2", 
        "blank3", 
        "received",
        "received_rehearing",
        "scheduled",
        "held",
        "decided",
        "decided_rehearing",
        "pending",
        "blank4"
    ]
).drop( ["blank", "blank2", "blank3", "blank4"], axis = 1)

df_raw.head(10)

Unnamed: 0,received,received_rehearing,scheduled,held,decided,decided_rehearing,pending
0,,,,,,,
1,,,,,,,
2,86,1,179,73,104,5,64
3,,,,,,,
4,,,,,,,
5,,,,,,,
6,Received -\n Original\n Appeals,Received -\n Rehearing /\n Recon / Remand,Hearings\n Scheduled,Hearings\n Held,Decisions Issued -\n Original Appeals,Decisions Issued -\n Rehearing / Recon\n / Remand,Pending
7,,,,,,,
8,234,27,500,265,286,26,86
9,,,,,,,


In [3]:
# Split and add up each year
df1 = (
    df_raw
    .iloc[
        [2,8,14,20,26]
    ]
    .assign(
        year = list(reversed(range(2011,2016)))
    )
)

df1

Unnamed: 0,received,received_rehearing,scheduled,held,decided,decided_rehearing,pending,year
2,86,1,179,73,104,5,64,2015
8,234,27,500,265,286,26,86,2014
14,327,17,565,302,353,12,142,2013
20,335,1,463,263,250,1,158,2012
26,55,0,6,2,3,0,52,2011


In [4]:
# Load separate sheet 2015-2021 FOIA
df2_raw = pd.read_excel(
    "../../data/states/michigan/Copy_of_Expunction_FOIA_2015_-_2021(1).xlsx",
    skiprows = 2,
    names = [
        "blank", 
        "received",  
        "received_rehearing",
        "scheduled",
        "held",
        "decided",
        "decided_rehearing",
        "pending",
    ]
).drop( ["blank"], axis = 1)

df2_raw.head(10)

Unnamed: 0,received,received_rehearing,scheduled,held,decided,decided_rehearing,pending
0,497,13,745,544,381,18,715
1,,,,,,,
2,,,,,,,
3,Original Appeal Received,Remand/REH\nReceived,Hearings Scheduled,Hearings Held,Original Appeal Closed,Remands/REH Closed,Pending
4,821,18,789,617,427,13,604
5,821,18,789,617,427,13,604
6,,,,,,,
7,,,,,,,
8,Original Appeal Received,Remand/REH\nReceived,Hearings Scheduled,Hearings Held,Original Appeal Closed,Remands/REH Closed,Pending
9,692,12,1155,725,746,14,205


In [5]:
# split and add up the years
df2 = (
    df2_raw
    .iloc[
        [0,5,10,15,20,25,30]
    ]
    .assign(
        year = list(reversed(range(2015,2022)))
    )
)

In [6]:
# combine to get total expunction appeals
exp_raw = (
    pd
    .concat([df1, df2])
    .sort_values("year")
    .reset_index()
    .drop(columns = "index")
)

exp_raw

Unnamed: 0,received,received_rehearing,scheduled,held,decided,decided_rehearing,pending,year
0,55,0,6,2,3,0,52,2011
1,335,1,463,263,250,1,158,2012
2,327,17,565,302,353,12,142,2013
3,234,27,500,265,286,26,86,2014
4,86,1,179,73,104,5,64,2015
5,226,0,119,55,67,0,159,2015
6,390,4,712,428,391,1,161,2016
7,735,9,1120,746,600,6,299,2017
8,802,14,1323,806,838,16,261,2018
9,692,12,1155,725,746,14,205,2019


In [7]:
# fix the fact that parts of 2015 came in both datasets
exp = exp_raw.groupby("year").sum().reset_index()
exp

Unnamed: 0,year,received,received_rehearing,scheduled,held,decided,decided_rehearing,pending
0,2011,55,0,6,2,3,0,52
1,2012,335,1,463,263,250,1,158
2,2013,327,17,565,302,353,12,142
3,2014,234,27,500,265,286,26,86
4,2015,312,1,298,128,171,5,223
5,2016,390,4,712,428,391,1,161
6,2017,735,9,1120,746,600,6,299
7,2018,802,14,1323,806,838,16,261
8,2019,692,12,1155,725,746,14,205
9,2020,821,18,789,617,427,13,604


In [8]:
# total received
exp["received"].sum()

5200

In [9]:
# total held
exp["held"].sum()

4826

In [10]:
exp.loc[1:9]["received"].mean()

516.4444444444445

In [11]:
exp.loc[1:9]["held"].mean()

475.55555555555554

In [12]:
# helper function for sums
def summarize(f):
    return (
        pd
        .DataFrame(
            {
                "received": f["received"].sum(),
                "average received": f["received"].mean(),
                "average held": f["held"].mean()
            }, index = [""]
        )
        .T
    )

In [13]:
# summary totals - shorten timeframe to avoid comparing incomplete years
(
    exp
    .loc[ 
        lambda x: 
        (x["year"] > 2011) & 
        (x["year"] < 2021)
    ]
    .pipe( summarize )
)

Unnamed: 0,Unnamed: 1
received,4648.0
average received,516.444444
average held,475.555556


In [14]:
# by year
(
    exp
    .assign(
        percent_decided = lambda f: f["held"]/f["scheduled"]
    )
)

Unnamed: 0,year,received,received_rehearing,scheduled,held,decided,decided_rehearing,pending,percent_decided
0,2011,55,0,6,2,3,0,52,0.333333
1,2012,335,1,463,263,250,1,158,0.568035
2,2013,327,17,565,302,353,12,142,0.534513
3,2014,234,27,500,265,286,26,86,0.53
4,2015,312,1,298,128,171,5,223,0.42953
5,2016,390,4,712,428,391,1,161,0.601124
6,2017,735,9,1120,746,600,6,299,0.666071
7,2018,802,14,1323,806,838,16,261,0.609221
8,2019,692,12,1155,725,746,14,205,0.627706
9,2020,821,18,789,617,427,13,604,0.782003


In [15]:
# Load yearly registries data
reg = pd.read_csv("../../outputs/MI_yearly_subs.csv")
reg

Unnamed: 0,year,subs
0,2010,4878
1,2011,5432
2,2012,6323
3,2013,7394
4,2014,7112
5,2015,8537
6,2016,9832
7,2017,11581
8,2018,13910
9,2019,17240


In [16]:
# merge to get percent that appeal (and go on to hearings)
merged = (
    exp
    .loc[
        # only using full years
        lambda x: 
        (x["year"] > 2011) & 
        (x["year"] < 2021)
    ]
    .merge(
        reg,
        on = "year"
    )
    .assign(
        percent_appealing = lambda f: f["received"]/f["subs"]
    )
    [["year", "received", "subs", "percent_appealing"]]
)

merged

Unnamed: 0,year,received,subs,percent_appealing
0,2012,335,6323,0.052981
1,2013,327,7394,0.044225
2,2014,234,7112,0.032902
3,2015,312,8537,0.036547
4,2016,390,9832,0.039666
5,2017,735,11581,0.063466
6,2018,802,13910,0.057656
7,2019,692,17240,0.040139
8,2020,821,16458,0.049885


In [17]:
merged.mean().to_frame("Average 2012-2020")

Unnamed: 0,Average 2012-2020
year,2016.0
received,516.444444
subs,10931.888889
percent_appealing,0.046385


---

---

---