## Analyze appeals data from the New Jersey Department of Children and Families

In [1]:
import pandas as pd

In [2]:
# fix columns
cols = [
    "allegation",
    "affirmed_counsel", # alj affirmed, counsel
    "overturned_counsel", # alj modified, but agency overruled, counsel
    "affirmed_self", # sub affirmed and alj affirmed, pro se
    "overturned_self", # ALJ modified, but shot down by agency
    "sub_total_affirmed", # all affirmed rulings
    "mod_counsel", # finding modified, counsel
    "mod_self", # finding modified, self represented
    "mod_total", # total modified
    "total_appeals"
]

In [3]:
# raw dataframe from excel book of transmitted cases
rdf = (
    pd.read_excel(
        "../../data/states/new_jersey/Appeal Outcomes - 2010 to 2020 - Final.xlsx",
        sheet_name = 0,
        skiprows = 4,
        usecols = "A:J",
        names = cols,
    )
)

rdf

Unnamed: 0,allegation,affirmed_counsel,overturned_counsel,affirmed_self,overturned_self,sub_total_affirmed,mod_counsel,mod_self,mod_total,total_appeals
0,Abuse,2,1,2,,5,0,1,1,6
1,Abuse and Neglect,3,,5,1,9,1,1,2,11
2,Educational Neglect,,,,,0,,,0,0
3,Emotional & Physical Abuse,,,,,0,,,0,0
4,Emotional Abuse,2,,2,,4,1,,1,5
...,...,...,...,...,...,...,...,...,...,...
230,Sexual Abuse,5,,5,,10,7,,7,17
231,Sexual Abuse & Neglect,,,,,0,1,,1,1
232,No Data Available,,,,,0,,,0,0
233,Total,6,0,14,0,20,22,4,26,46


In [4]:
# pull all the separate year tables from the excel sheet

years = {}
years["2010"] = rdf.loc[0:13]
years["2011"] = rdf.loc[23:36]
years["2012"] = rdf.loc[44:57]
years["2013"] = rdf.loc[65:78]
years["2014"] = rdf.loc[87:100]
years["2015"] = rdf.loc[109:122]
years["2016"] = rdf.loc[131:145]
years["2017"] = rdf.loc[153:166]
years["2018"] = rdf.loc[176:189]
years["2019"] = rdf.loc[197:210]
years["2020"] = rdf.loc[219:232]

In [5]:
# add the year and fix the dtypes
def add_year(item):
    df = item[1].assign(
        year = item[0],
    )
    
    # convert columns to numbers except the allegation column
    df.iloc[:,1:] = df.iloc[:,1:].applymap(lambda x: pd.to_numeric(x))
    
    return df

# concat all dataframes
adf = (
    pd.concat([ add_year(each) for each in years.items() ])
)

# all transmitted
adf.head()

Unnamed: 0,allegation,affirmed_counsel,overturned_counsel,affirmed_self,overturned_self,sub_total_affirmed,mod_counsel,mod_self,mod_total,total_appeals,year
0,Abuse,2.0,1.0,2.0,,5.0,0.0,1.0,1.0,6.0,2010
1,Abuse and Neglect,3.0,,5.0,1.0,9.0,1.0,1.0,2.0,11.0,2010
2,Educational Neglect,,,,,0.0,,,0.0,0.0,2010
3,Emotional & Physical Abuse,,,,,0.0,,,0.0,0.0,2010
4,Emotional Abuse,2.0,,2.0,,4.0,1.0,,1.0,5.0,2010


In [6]:
# the not transmitted numbers are on the second sheet
nps = (
    pd.read_excel(
        "../../data/states/new_jersey/Appeal Outcomes - 2010 to 2020 - Final.xlsx",
        skiprows = 4,
        names = ["year", "count"],
        sheet_name = 1,
        usecols = "D,G"
    ).dropna()
)

nps

Unnamed: 0,year,count
0,2010.0,1276.0
1,2011.0,1317.0
2,2012.0,1336.0
3,2013.0,1158.0
4,2014.0,846.0
5,2015.0,661.0
6,2016.0,592.0
7,2017.0,499.0
8,2018.0,493.0
9,2019.0,518.0


In [7]:
# appeal outcomes in non transmitted cases (3rd sheet)

# fix columns
cols = [
    "allegation",
    "affirmed", # agency affirms original decision, appellant has counsel
    "affirmed_nc", # agency affirms, no counsel
    "withdrawn", # agency affirms b/c appellant withdraws, counsel
    "withdrawn_nc", # agency affirms, no counsel
    "review", # agency affirms through written record review, counsel
    "review_nc", # agency affirms, no counsel
    "pre_reviewed", # agency affirms through previous written review, counsel
    "pre_reviewed_nc", # agency affirms, no counsel
    "forum", # case is in another forum, agency affirms, counsel 
    "forum_nc", # agency affirms, no counsel
    "decided", # case was already decided by judge, agency affirms, counsel
    "decided_nc", # agency affirms, no counsel
    "modified", # agency reverses, counsel
    "modified_nc", # agency reverses, no counsel,
    "no_perp", # appellant isn't the perpetrator, agency affirms, counsel
    "no_perp_nc", # agency affirms, counsel
    "moot_nc", # agency affirms, no counsel
    "total_appeals"
]

# outcomes
oc = (
    pd.read_excel(
        "../../data/states/new_jersey/Appeal Outcomes - 2010 to 2020 - Final.xlsx",
        names = cols,
        usecols = "B:T",
        sheet_name = 2,
        skiprows = 5
    ).fillna(0)
)

oc.head()

Unnamed: 0,allegation,affirmed,affirmed_nc,withdrawn,withdrawn_nc,review,review_nc,pre_reviewed,pre_reviewed_nc,forum,forum_nc,decided,decided_nc,modified,modified_nc,no_perp,no_perp_nc,moot_nc,total_appeals
0,Abuse,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Abuse and Neglect,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Educational Neglect,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Emotional & PhysicalAbuse,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Emotional Abuse,0,0,0,0,0,0,0,0,2,0,0,0,1,3,0,0,0,6


In [8]:
# sort through the outcome years and add together
yearoc = {}
yearoc["2020"] = oc.loc[0:15] # file is in descending order
yearoc["2019"] = oc.loc[25:40]
yearoc["2018"] = oc.loc[49:65]
yearoc["2017"] = oc.loc[74:90]
yearoc["2016"] = oc.loc[101:116]
yearoc["2015"] = oc.loc[125:140]
yearoc["2014"] = oc.loc[151:166]
yearoc["2013"] = oc.loc[176:191]
yearoc["2012"] = oc.loc[202:217]
yearoc["2011"] = oc.loc[227:242]
yearoc["2010"] = oc.loc[251:266]


# concat all dataframes all outcomes, add a year column
aoc = (
    pd.concat([ add_year(each) for each in yearoc.items() ])
)

aoc.tail()

Unnamed: 0,allegation,affirmed,affirmed_nc,withdrawn,withdrawn_nc,review,review_nc,pre_reviewed,pre_reviewed_nc,forum,forum_nc,decided,decided_nc,modified,modified_nc,no_perp,no_perp_nc,moot_nc,total_appeals,year
262,Physical & Emotional Abuse & Neglect,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2010
263,Physical & Sexual Abuse,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,2010
264,Sexual Abuse,1,0,2,0,0,1,0,0,41,56,5,10,9,24,0,0,0,149,2010
265,Sexual Abuse & Neglect,0,0,0,0,0,0,0,0,0,2,0,1,0,1,0,0,0,4,2010
266,No Data Available,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2010


## Summary tables

In [9]:
# sums of all transmitted appeals
tr_sum = (
    adf
    .pipe( 
        lambda grp: 
        pd.Series(
        {
            "total": grp["total_appeals"].sum(),
            "appellant_success": grp["mod_total"].sum(),
            "with_counsel": grp[["affirmed_counsel", "overturned_counsel", "mod_counsel"]].sum().sum(),
            "no_counsel": grp[["affirmed_self", "overturned_self", "mod_self"]].sum().sum(),
            "no_counsel_success": grp["mod_self"].sum(),
            "counsel_success": grp["mod_counsel"].sum()
        })
    )
    .to_frame("Transmitted Appeals")
    .T
    .assign(
        percent_success = lambda f: f["appellant_success"]/f["total"],
        percent_counsel_success = lambda f: f["counsel_success"] / f["with_counsel"],
        percent_nocounsel_success = lambda f: f["no_counsel_success"] / f["no_counsel"]
    )
)

tr_sum.T

Unnamed: 0,Transmitted Appeals
total,2131.0
appellant_success,820.0
with_counsel,773.0
no_counsel,1359.0
no_counsel_success,407.0
counsel_success,414.0
percent_success,0.384796
percent_counsel_success,0.535576
percent_nocounsel_success,0.299485


In [10]:
# summary of all non transmitted outcomes including technical disqualifications
ntr = (
    aoc
    .pipe( 
        lambda f: 
        pd.Series(
            {
                "total": f["total_appeals"].sum(),
                "appellant_success": (f["modified"].sum() + f["modified_nc"].sum()),
                "with_counsel": (
                    f["affirmed"].sum() + 
                    f["withdrawn"].sum() + 
                    f["review"].sum() + 
                    f["pre_reviewed"].sum() + 
                    f["forum"].sum() + 
                    f["decided"].sum() + 
                    f["modified"].sum() + 
                    f["no_perp"].sum()
                ),
                "counsel_success": f["modified"].sum()
            }
        )
    )
    .to_frame("Non Transmitted")
    .T
    .assign(
        percent_success = lambda f: f["appellant_success"]/f["total"],
        percent_counsel_success = lambda f: f["counsel_success"] / f["with_counsel"],
        percent_nocounsel_success = lambda f: (f["appellant_success"] - f["counsel_success"]) / (f["total"] - f["with_counsel"])
    )
)

ntr.T

Unnamed: 0,Non Transmitted
total,9004.0
appellant_success,3679.0
with_counsel,1484.0
counsel_success,421.0
percent_success,0.408596
percent_counsel_success,0.283693
percent_nocounsel_success,0.433245


In [11]:
# non-transmissals minus technical disqualifications
# removing appeals that were previously reviewed, previously decided, pending in another forum, and moot
no_disq = (
    aoc[
        [
            "allegation",
            "affirmed", 
            "affirmed_nc", 
            "withdrawn", 
            "withdrawn_nc", 
            "review", 
            "review_nc",
            "modified",
            "modified_nc"
        ]
    ]
    .pipe( 
        lambda f: 
        pd.Series(
            {
                "total": f.iloc[:,1:].sum().sum(),
                "appellant_success": (f["modified"].sum() + f["modified_nc"].sum()),
                "with_counsel": (
                    f["affirmed"].sum() + 
                    f["withdrawn"].sum() + 
                    f["review"].sum() +
                    f["modified"].sum()
                ),
                "no_counsel": f[["affirmed_nc", "withdrawn_nc", "review_nc", "modified_nc"]].sum().sum(),
                "no_counsel_success": f["modified_nc"].sum(),
                "counsel_success": f["modified"].sum()
            }
        )
    )
    .to_frame("Not Transmitted")
    .T
    .assign(
        percent_success = lambda f: f["appellant_success"]/f["total"],
        percent_counsel_success = lambda f: f["counsel_success"] / f["with_counsel"],
        percent_nocounsel_success = lambda f: f["no_counsel_success"]/ f["no_counsel"]
    ))

no_disq.T

Unnamed: 0,Not Transmitted
total,4559.0
appellant_success,3679.0
with_counsel,496.0
no_counsel,4063.0
no_counsel_success,3258.0
counsel_success,421.0
percent_success,0.806975
percent_counsel_success,0.84879
percent_nocounsel_success,0.801871


In [12]:
# combine the transmitted and non transmitted
# use iloc to remove the percentage columns
combined = (
    pd
    .concat(
        [
            no_disq.iloc[:,0:6], 
            tr_sum.iloc[:,0:6]
        ]
      )
    )

combined

Unnamed: 0,total,appellant_success,with_counsel,no_counsel,no_counsel_success,counsel_success
Not Transmitted,4559.0,3679.0,496.0,4063.0,3258.0,421.0
Transmitted Appeals,2131.0,820.0,773.0,1359.0,407.0,414.0


In [13]:
# percent not prosecuted
(
    pd
    .DataFrame(
        data = [
            # not prosecuted
            combined.loc["Not Transmitted"]["total"]/combined["total"].sum(),
            # percent success of not prosecuted
            combined.loc["Not Transmitted"]["appellant_success"]/combined.loc["Not Transmitted"]["total"],
            # percent not prosecuted success out of all cases
            combined.loc["Not Transmitted"]["appellant_success"]/combined["total"].sum()
        ],
        index = [
            "Not prosecuted", 
            "Success of not prosecuted",
            "Success not prosecuted out of all"
        ],
        columns = ["Percent"]
    )
)

Unnamed: 0,Percent
Not prosecuted,0.681465
Success of not prosecuted,0.806975
Success not prosecuted out of all,0.549925


In [14]:
# add stats together, get overall percentages
combined_sum = (
     combined
     .sum()
     .to_frame("Combined")
     .T
     .assign(
         percent_success = lambda f: f["appellant_success"]/f["total"],
         percent_counsel = lambda f: f["with_counsel"]/f["total"],
         percent_counsel_success = lambda f: f["counsel_success"]/f["with_counsel"],
         percent_nocounsel_success = lambda f: f["no_counsel_success"]/ f["no_counsel"]
     )
)

combined_sum.T

Unnamed: 0,Combined
total,6690.0
appellant_success,4499.0
with_counsel,1269.0
no_counsel,5422.0
no_counsel_success,3665.0
counsel_success,835.0
percent_success,0.672496
percent_counsel,0.189686
percent_counsel_success,0.657998
percent_nocounsel_success,0.67595


## By Year

In [15]:
# all transmitted appeals by year
tr_byyear = (
    adf
    .groupby("year")
    .apply( 
        lambda grp: pd.Series(
            {
                "total": grp["total_appeals"].sum(),
                "appellant_success": grp["mod_total"].sum(),
                "with_counsel": grp[["affirmed_counsel", "overturned_counsel", "mod_counsel"]].sum().sum(),
                "no_counsel": grp[["affirmed_self", "overturned_self", "mod_self"]].sum().sum(),
                "no_counsel_success": grp["mod_self"].sum(),
                "counsel_success": grp["mod_counsel"].sum()
            }
        )
    )
)

tr_byyear

Unnamed: 0_level_0,total,appellant_success,with_counsel,no_counsel,no_counsel_success,counsel_success
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010,399.0,110.0,106.0,293.0,65.0,45.0
2011,361.0,124.0,125.0,236.0,61.0,63.0
2012,343.0,107.0,126.0,218.0,61.0,47.0
2013,145.0,94.0,67.0,78.0,47.0,47.0
2014,149.0,53.0,45.0,104.0,35.0,18.0
2015,150.0,61.0,45.0,105.0,36.0,25.0
2016,159.0,53.0,52.0,107.0,25.0,28.0
2017,136.0,60.0,53.0,83.0,30.0,30.0
2018,153.0,84.0,73.0,80.0,34.0,50.0
2019,90.0,48.0,53.0,37.0,9.0,39.0


In [16]:
# all non transmitted by year, minus disqualifications
ntr_byyear = (
    aoc[
        [
            "allegation",
            "affirmed", 
            "affirmed_nc", 
            "withdrawn", 
            "withdrawn_nc", 
            "review", 
            "review_nc",
            "modified", 
            "modified_nc",
            "year"
        ]
    ]
    .groupby("year")
    .apply( 
        lambda f: 
        pd.Series(
            {
                "total": f[[
                    "affirmed", 
                    "affirmed_nc", 
                    "withdrawn", 
                    "withdrawn_nc", 
                    "review", 
                    "review_nc",
                    "modified", 
                    "modified_nc"
                ]].sum().sum(),
                "appellant_success": (f["modified"].sum() + f["modified_nc"].sum()),
                "with_counsel": (
                    f["affirmed"].sum() + 
                    f["withdrawn"].sum() + 
                    f["review"].sum() +
                    f["modified"].sum()
                ),
                "no_counsel": f[[
                    "affirmed_nc", 
                    "withdrawn_nc", 
                    "review_nc", 
                    "modified_nc"
                ]].sum().sum(),
                "no_counsel_success": f["modified_nc"].sum(),
                "counsel_success": f["modified"].sum()
            }
        )
    )
)

ntr_byyear

Unnamed: 0_level_0,total,appellant_success,with_counsel,no_counsel,no_counsel_success,counsel_success
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010,602,496,82,520,426,70
2011,662,505,91,571,431,74
2012,662,539,52,610,501,38
2013,573,474,70,503,410,64
2014,420,354,43,377,320,34
2015,324,285,38,286,248,37
2016,300,235,30,270,206,29
2017,271,209,25,246,189,20
2018,264,200,24,240,181,19
2019,303,241,26,277,219,22


In [17]:
# combine to get yearly stats for all
all_byyear = (
    pd
    .concat([tr_byyear, ntr_byyear])
    .reset_index()
    .groupby("year")
    .sum()
    # turn into percents
    .assign(
        percent_counsel_success = lambda f: f["counsel_success"] / f["with_counsel"],
        percent_no_counsel_success = lambda f: f["no_counsel_success"]/ f["no_counsel"],
        percent_no_counsel = lambda f: f["no_counsel"]/f["total"],
        percent_appellant_success = lambda f: f["appellant_success"]/f["total"],
        percent_with_counsel = lambda f: f["with_counsel"]/f["total"],
    )
)

all_byyear

Unnamed: 0_level_0,total,appellant_success,with_counsel,no_counsel,no_counsel_success,counsel_success,percent_counsel_success,percent_no_counsel_success,percent_no_counsel,percent_appellant_success,percent_with_counsel
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010,1001.0,606.0,188.0,813.0,491.0,115.0,0.611702,0.603936,0.812188,0.605395,0.187812
2011,1023.0,629.0,216.0,807.0,492.0,137.0,0.634259,0.609665,0.788856,0.614858,0.211144
2012,1005.0,646.0,178.0,828.0,562.0,85.0,0.477528,0.678744,0.823881,0.642786,0.177114
2013,718.0,568.0,137.0,581.0,457.0,111.0,0.810219,0.786575,0.809192,0.791086,0.190808
2014,569.0,407.0,88.0,481.0,355.0,52.0,0.590909,0.738046,0.845343,0.71529,0.154657
2015,474.0,346.0,83.0,391.0,284.0,62.0,0.746988,0.726343,0.824895,0.729958,0.175105
2016,459.0,288.0,82.0,377.0,231.0,57.0,0.695122,0.612732,0.821351,0.627451,0.178649
2017,407.0,269.0,78.0,329.0,219.0,50.0,0.641026,0.665653,0.808354,0.660934,0.191646
2018,417.0,284.0,97.0,320.0,215.0,69.0,0.71134,0.671875,0.767386,0.681055,0.232614
2019,393.0,289.0,79.0,314.0,228.0,61.0,0.772152,0.726115,0.798982,0.735369,0.201018


## Compare to substantiations

In [18]:
subs = pd.read_csv("../../outputs/NJ_yearly_subs.csv")
subs

Unnamed: 0,year,value
0,2015,3500.0
1,2016,3102.0
2,2017,2765.0
3,2018,2678.0
4,2019,2188.0
5,2020,1475.0


In [19]:
(
    all_byyear
    [["total"]]
    .merge(
        subs,
        on = "year"
    )
    .assign(
        percent_appealing = lambda x: x["total"]/x["value"]
    )
)

Unnamed: 0,year,total,value,percent_appealing
0,2015,474.0,3500.0,0.135429
1,2016,459.0,3102.0,0.147969
2,2017,407.0,2765.0,0.147197
3,2018,417.0,2678.0,0.155713
4,2019,393.0,2188.0,0.179616
5,2020,224.0,1475.0,0.151864


---

---

---