## Analyze appeals data from the Arizona Department of Child Safety

In [1]:
import pandas as pd

In [2]:
# Load appeals 2016 through 9/3/2021
df = (
    pd.read_csv(
        "../../data/states/arizona/DCS Listing(REVISED)(Final) (1a).csv",
        index_col = 0
    )
    .assign(
        Vacate = lambda f: f["Vacate"].str.upper().str.strip(),
        Finding = lambda f: f["Finding"].str.upper().str.strip(),
        Lawyer = lambda f: f["Lawyer (Y/N)"].str.upper().str.strip(),
        Decision = lambda f: f["DCS Decision"].str.upper().str.strip()
    )
    .dropna(
        subset = [
            # OAH says records without dates are not expunction hearings
            "Request for Hearing", 
        ]
    )
)

df.head()

Unnamed: 0,Matter ID,Client Sort,Request for Hearing,Vacate,Hearing date,Finding,Lawyer (Y/N),DCS Decision,Appeal?,Lawyer,Decision
1,16C-817531-DCS,Department of Child Safety,1/6/2016,DCS,,,,,,,
2,16C-831210-DCS,Department of Child Safety,1/5/2016,,2/17/2016,UNSUBSTANTIATED,N,DCS rejects,UNK,N,DCS REJECTS
3,16C-859400-DCS,Department of Child Safety,1/5/2016,SETTLE,,,,,,,
4,16C-814334-DCS,Department of Child Safety,1/6/2016,FTA,,,,,,,
5,16C-815333-DCS,Department of Child Safety,1/6/2016,DCS,,,,,,,


In [3]:
df["Vacate"].unique()

array(['DCS', nan, 'SETTLE', 'FTA', 'WITHDRAWN', 'HEARING',
       'MATTER NO LONGER HAS VACATE ORDER', 'VACATE', 'WITHDRAW', 'VACO',
       'CASE PENDING'], dtype=object)

In [4]:
# 2015 is same data in a different set
df2 = (
    pd.read_csv(
        "../../data/states/arizona/DCS Listing 2015 (Final).csv",
        index_col = None,
    )
    .drop( columns = ["Unnamed: 2", "Unnamed: 3"])
)

df2.columns = df.columns[:-2]

df2 = (
    df2
    .assign(
        Vacate = lambda f: f["Vacate"].str.upper().str.strip(),
        Finding = lambda f: f["Finding"].str.upper().str.strip(),
        Lawyer = lambda f: f["Lawyer (Y/N)"].str.upper().str.strip(),
        Decision = lambda f: f["DCS Decision"].str.upper().str.strip()
    )
    .dropna(
        subset = [
            "Request for Hearing", # OAH says these are not expunction hearings
        ]
    )
)

df2.tail(3)

Unnamed: 0,Matter ID,Client Sort,Request for Hearing,Vacate,Hearing date,Finding,Lawyer (Y/N),DCS Decision,Appeal?,Lawyer,Decision
262,15C-803341-DCS,Department of Child Safety,1/7/2015,HEARING,2/13/2015,SUBSTANTIATED(ABUSE),Y,AACCEPEDIT,UNK,Y,AACCEPEDIT
263,15C-733294B-DCS,Department of Child Safety,1/7/2015,HEARING,5/7/2015,SUBSTANTIATED(NEGLECT),N,AACCEPEDIT,UNK,N,AACCEPEDIT
264,15C-796590-DCS,Department of Child Safety,1/7/2015,WITHDRAWN,,,,,,,


In [5]:
# combine years, add year column
df_all = (
    pd.concat([df, df2])
    .assign(
        year = lambda f: f["Request for Hearing"].str.slice(-4),
    ))

df_all.head(3)

Unnamed: 0,Matter ID,Client Sort,Request for Hearing,Vacate,Hearing date,Finding,Lawyer (Y/N),DCS Decision,Appeal?,Lawyer,Decision,year
1,16C-817531-DCS,Department of Child Safety,1/6/2016,DCS,,,,,,,,2016
2,16C-831210-DCS,Department of Child Safety,1/5/2016,,2/17/2016,UNSUBSTANTIATED,N,DCS rejects,UNK,N,DCS REJECTS,2016
3,16C-859400-DCS,Department of Child Safety,1/5/2016,SETTLE,,,,,,,,2016


## Subset

In [6]:
# convenience function for counting cases
def cases(df):
    return df["Matter ID"].nunique()

df_all.pipe(cases)

1599

In [7]:
# remove cases that never went through
subsetting = [
    "WITHDRAWN", 
    "WITHDRAW",
    "CASE PENDING"
]

completed = (
    df_all
    .loc[
        lambda x: 
        ~x["Vacate"].isin(subsetting) &
        ~x["Hearing date"].str.contains("Pending", case = False, na = False)
    ]
)

completed.pipe(cases)

1398

## Hearings

In [8]:
# All cases that actually went to hearings
def filter_hearing(df):
    return df.loc[ lambda x: ~x["Finding"].isna() ]

hearing = df_all.pipe(filter_hearing)

hearing.pipe(cases)

411

In [9]:
# percent going to hearing
hearing.pipe(cases)/df_all.pipe(cases)

0.2570356472795497

In [10]:
# yearly hearing or not
yearly_hearing = (    
    df_all
    .groupby("year")
    .apply( lambda grp: pd.Series({
        # total requests
        "requests": grp.pipe(cases),
        "hearing": grp.pipe(filter_hearing).pipe(cases)
    }))
    .assign(
        percent_hearing = lambda f: f["hearing"]/f["requests"]
    )
)

yearly_hearing

Unnamed: 0_level_0,requests,hearing,percent_hearing
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,245,73,0.297959
2016,238,36,0.151261
2017,158,39,0.246835
2018,345,90,0.26087
2019,289,61,0.211073
2020,192,81,0.421875
2021,132,31,0.234848


In [11]:
yearly_hearing.mean().to_frame("")

Unnamed: 0,Unnamed: 1
requests,228.428571
hearing,58.714286
percent_hearing,0.260674


## Hearing results

In [12]:
# hearing outcomes
sorted(list(hearing["Finding"].unique()))

['SUB(ABUSE)',
 'SUB(NEGLECT)',
 'SUBSTANTIATE (NEGLECT)',
 'SUBSTANTIATE(ABUSE AND NEGLECT)',
 'SUBSTANTIATE(ABUSE)',
 'SUBSTANTIATE(ABUSE); UNSUBSTANTIATE(NEGLECT)',
 'SUBSTANTIATE(MODIFED)(ABUSE)',
 'SUBSTANTIATE(MODIFED)(NEGLECT)',
 'SUBSTANTIATE(MODIFICATION)(NEGLECT)',
 'SUBSTANTIATE(MODIFIED)(ABUSE)',
 'SUBSTANTIATE(MODIFIED)(NEGLECT)',
 'SUBSTANTIATE(NEGELCT)',
 'SUBSTANTIATE(NEGLECT & ABUSE)',
 'SUBSTANTIATE(NEGLECT(',
 'SUBSTANTIATE(NEGLECT)',
 'SUBSTANTIATE9NEGELCT)',
 'SUBSTANTIATE9NEGLECT)',
 'SUBSTANTIATED',
 'SUBSTANTIATED (ABUSE)',
 'SUBSTANTIATED (NEGLECT & ABUSE)',
 'SUBSTANTIATED (NEGLECT)',
 'SUBSTANTIATED(ABUSE',
 'SUBSTANTIATED(ABUSE)',
 'SUBSTANTIATED(NEGLECT)',
 'SUSTANTIATE(NEGLECT)',
 'UNSUB(ABUSE)',
 'UNSUB(NEGELCT)',
 'UNSUB(NEGLECT)',
 'UNSUBSTANTIATE (NEGLECT)',
 'UNSUBSTANTIATE(ABUSE)',
 'UNSUBSTANTIATE(NEGLECT)',
 'UNSUBSTANTIATED',
 'UNSUBSTANTIATED  (NEGLECT)',
 'UNSUBSTANTIATED (ABUSE)',
 'UNSUBSTANTIATED (NEGLECT)',
 'UNSUBSTANTIATED(ABUSE)',
 'UNSUB

In [13]:
# filter to find appellant success in hearing
def filter_hearing_success(df):
    return (
        df
        .pipe(filter_hearing)
        .loc[ 
            lambda x: 
            x["Finding"].str.contains("UNSUB", na = False)  &
            # filter out the only one that has a sub/unsub combo
            ~x["Finding"].str.contains("^SUBSTANTIATE\(ABUSE\)")
        ]
    )

# success in hearing
shearing = hearing.pipe(filter_hearing_success)

shearing["Finding"].value_counts()

UNSUBSTANTIATED(NEGLECT)      26
UNSUBSTANTIATED(ABUSE)        24
UNSUBSTANTIATE(NEGLECT)       17
UNSUBSTANTIATE(ABUSE)         14
UNSUBSTANTIATED (ABUSE)        8
UNSUBSTANTIATED (NEGLECT)      7
UNSUB(NEGLECT)                 6
UNSUB(ABUSE)                   4
UNSUBSTANTIATED                1
UNSUBSTANTIATED  (NEGLECT)     1
UNSUB(NEGELCT)                 1
UNSUBSTANTIATE (NEGLECT)       1
Name: Finding, dtype: int64

In [14]:
# filter only to appellants with lawyers
def filter_lawyer(df):
    return df.loc[ lambda x: x["Lawyer"] == "Y" ]

In [15]:
# stats for hearings
(
    pd
    .DataFrame(
        {
            "All hearings": completed.pipe(filter_hearing).pipe(cases),
            "Won in hearing": completed.pipe(filter_hearing_success).pipe(cases),
            "Had lawyers": completed.pipe(filter_hearing).pipe(filter_lawyer).pipe(cases),
        }, index = ["Cases"])
    .T
    .assign(
        Percent = lambda f: f["Cases"]/hearing.pipe(cases)
    )
)

Unnamed: 0,Cases,Percent
All hearings,411,1.0
Won in hearing,110,0.26764
Had lawyers,79,0.192214


In [16]:
# overall stats for lawyers
(
    pd
    .DataFrame(
        {
            "Had lawyer": (
                completed
                .pipe(filter_hearing)
                .pipe(filter_lawyer)
                .pipe(cases)
            ),
            "Lawyer win": (
                completed
                .pipe(filter_hearing_success)
                .pipe(filter_lawyer)
                .pipe(cases)),
        }, index = ["Cases"]

    )
    .assign(
        percent_lawyer_win = lambda f: f["Lawyer win"]/f["Had lawyer"]
    )
)

Unnamed: 0,Had lawyer,Lawyer win,percent_lawyer_win
Cases,79,35,0.443038


In [17]:
# yearly hearing outcomes
# reshape main frame for merging
yearly_hearing_outcomes = (
    completed
    .pipe(filter_hearing)
    .groupby("year")
    .apply( 
        lambda grp: pd.Series(
            {
                "All hearings": grp.pipe(cases),
                "Won in hearings": grp.pipe(filter_hearing_success).pipe(cases),
                "Had Lawyers": grp.pipe(filter_lawyer).pipe(cases)
            }
        )
    )
    .assign(
        percent_won = lambda f: f["Won in hearings"]/f["All hearings"],
        percent_lawyers = lambda f: f["Had Lawyers"]/f["All hearings"]
    )
)

yearly_hearing_outcomes

Unnamed: 0_level_0,All hearings,Won in hearings,Had Lawyers,percent_won,percent_lawyers
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015,73,21,11,0.287671,0.150685
2016,36,9,2,0.25,0.055556
2017,39,11,14,0.282051,0.358974
2018,90,32,20,0.355556,0.222222
2019,61,20,18,0.327869,0.295082
2020,81,12,8,0.148148,0.098765
2021,31,5,6,0.16129,0.193548


In [18]:
yearly_hearing_outcomes.mean().to_frame("")

Unnamed: 0,Unnamed: 1
All hearings,58.714286
Won in hearings,15.714286
Had Lawyers,11.285714
percent_won,0.258941
percent_lawyers,0.196405


In [19]:
# lawyer success rate across years
yearly_lawyer_success = (
    completed
    .groupby("year")
    .apply( 
        lambda grp: pd.Series(
            {
                "lawyers": grp.pipe(filter_lawyer).pipe(cases),
                "lawyer success": (
                    grp.pipe(filter_lawyer)
                    .pipe(filter_hearing_success)
                    .pipe(cases)
                )
            }
        )
    )
    .assign(
        percent_lawyers_success = lambda f: f["lawyer success"]/f["lawyers"]
    )
)

yearly_lawyer_success

Unnamed: 0_level_0,lawyers,lawyer success,percent_lawyers_success
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,11,4,0.363636
2016,2,0,0.0
2017,14,5,0.357143
2018,20,10,0.5
2019,18,11,0.611111
2020,9,4,0.444444
2021,6,1,0.166667


In [20]:
yearly_lawyer_success.mean()

lawyers                    11.428571
lawyer success              5.000000
percent_lawyers_success     0.349000
dtype: float64

## Other outcomes

In [21]:
## Outcomes: vacated, settled, fail to appear
def filter_fta(df):
    return df.loc[ 
        lambda x: x["Vacate"] == "FTA"
    ]

def filter_settled(df):
    return df.loc[
          lambda x: x["Vacate"].isin(["SETTLE"])
    ]

def filter_dismissed(df):
    return df.loc[
        lambda x: x["Vacate"].isin(["DCS", "VACATE", "VACO"])
    ]

(
    pd
    .DataFrame(
        {
            "Failed to appear": completed.pipe(filter_fta).pipe(cases),
            "Settled (tends to be good)": completed.pipe(filter_settled).pipe(cases),
            "Dismissed": completed.pipe(filter_dismissed).pipe(cases),
        },
        index = ["cases"]
    )
    .T
    .assign(
        percent = lambda f: f["cases"]/(completed.pipe(cases))
    )
)

Unnamed: 0,cases,percent
Failed to appear,361,0.258226
Settled (tends to be good),485,0.346924
Dismissed,138,0.098712


In [22]:
# yearly other outcomes
yearly_other_outcomes = (
    # using df all because calculating the incompletes too
    df_all
    .groupby("year")
    .apply( 
        lambda grp: pd.Series(
            {
                "All requests": grp.pipe(cases),
                "Failed to appear": grp.pipe(filter_fta).pipe(cases),
                "Settled": grp.pipe(filter_settled).pipe(cases),
                "Dismissed": grp.pipe(filter_dismissed).pipe(cases),
            }
        )
    )
    .assign(
        settled_or_dismissed = lambda f: f["Settled"] + f["Dismissed"],
        percent_settled_or_dismissed = lambda f: f["settled_or_dismissed"]/f["All requests"],
        percent_fta = lambda f: f["Failed to appear"]/f["All requests"],
        percent_settled = lambda f: f["Settled"]/f["All requests"],
        percent_dismissed = lambda f: f["Dismissed"]/f["All requests"]
    )
)

yearly_other_outcomes

Unnamed: 0_level_0,All requests,Failed to appear,Settled,Dismissed,settled_or_dismissed,percent_settled_or_dismissed,percent_fta,percent_settled,percent_dismissed
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015,245,64,64,15,79,0.322449,0.261224,0.261224,0.061224
2016,238,52,102,25,127,0.533613,0.218487,0.428571,0.105042
2017,158,35,47,15,62,0.392405,0.221519,0.297468,0.094937
2018,345,82,104,44,148,0.428986,0.237681,0.301449,0.127536
2019,289,75,103,22,125,0.432526,0.259516,0.356401,0.076125
2020,192,34,49,16,65,0.338542,0.177083,0.255208,0.083333
2021,132,19,16,1,17,0.128788,0.143939,0.121212,0.007576


In [23]:
yearly_other_outcomes.mean().to_frame("")

Unnamed: 0,Unnamed: 1
All requests,228.428571
Failed to appear,51.571429
Settled,69.285714
Dismissed,19.714286
settled_or_dismissed,89.0
percent_settled_or_dismissed,0.368187
percent_fta,0.217064
percent_settled,0.288791
percent_dismissed,0.079396


In [24]:
completed["Vacate"].unique()

array(['DCS', nan, 'SETTLE', 'FTA', 'HEARING',
       'MATTER NO LONGER HAS VACATE ORDER', 'VACATE', 'VACO'],
      dtype=object)

In [25]:
# yearly completion outcomes
yearly_completion_outcomes = (
    completed
    .loc[
        lambda x: x["Vacate"] != "FTA"
    ]
    .groupby("year")
    .apply( lambda grp: 
        pd.Series(
        {
            "All": grp.pipe(cases),
            "Won": grp.pipe(filter_hearing_success).pipe(cases),
            "Won or settled": (
                grp.pipe(filter_hearing_success).pipe(cases) +
                grp.pipe(filter_settled).pipe(cases)
            )
        })
    )
    .assign(
        percent_won_settled = lambda f: f["Won or settled"]/f["All"]
    )
)
yearly_completion_outcomes

Unnamed: 0_level_0,All,Won,Won or settled,percent_won_settled
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,152,21,85,0.559211
2016,164,9,111,0.676829
2017,102,11,58,0.568627
2018,238,32,136,0.571429
2019,187,20,123,0.657754
2020,146,12,61,0.417808
2021,48,5,21,0.4375


In [26]:
yearly_completion_outcomes.mean().to_frame("")

Unnamed: 0,Unnamed: 1
All,148.142857
Won,15.714286
Won or settled,85.0
percent_won_settled,0.555594


## DCS decisions

In [27]:
def filter_accepted(df): 
    return (
        df
        .dropna( subset = ["Decision"])
        .loc[ 
            lambda x: 
            x["Decision"].str.contains("ACCEPT|CERT|AACCEPEDIT|Adopted", case = False)
        ]
    )

# Get all accepted decisions
completed.pipe(filter_accepted)["Decision"].value_counts()

CERT                                 153
ALJ CERT                             140
AACCEPEDIT                            26
ACCEPT                                25
ACCEPT W/ MINOR TEXT CORRECTIONS       3
ADOPTED W/ MINOR TEXT CORRECTIONS      1
ADOPTED W/ MODIFICATIONS               1
DCS ACCEPTS W/MODIFCATIONS             1
ALJCERT                                1
AGENCY ACCEPTED DEC.                   1
Name: Decision, dtype: int64

In [28]:
# Percent of won cases accepted
(
    completed
    .pipe(filter_hearing_success)
    .pipe(filter_accepted)
    .pipe(cases)/(
        completed
        .pipe(filter_hearing_success)
        .pipe(cases)
    )
)


0.6818181818181818

## Compare to substantiations

In [29]:
subs = (
    pd
    .read_csv("../../outputs/AZ_yearly_subs.csv")
    .groupby("year")
    .sum()
    .reset_index()
    .assign(
        year = lambda f: f["year"].astype(str)
    )
)

subs

Unnamed: 0,year,Count
0,2010,9954.0
1,2011,11659.0
2,2012,14452.0
3,2013,15640.0
4,2014,18137.0
5,2015,18664.0
6,2016,16886.0
7,2017,15934.0
8,2018,15949.0
9,2019,14441.0


In [30]:
# reshape main frame for merging
appeals_yearly = (
    df_all
    .assign(
        year = lambda f: f["Request for Hearing"].str.slice(-4),
    )
    .groupby("year")
    ["Matter ID"]
    .nunique()
    .to_frame("appellants")
    .reset_index()
)

appeals_yearly

Unnamed: 0,year,appellants
0,2015,245
1,2016,238
2,2017,158
3,2018,345
4,2019,289
5,2020,192
6,2021,132


In [31]:
# get percent of perps who appeal
with_subs = (
    appeals_yearly
    .merge(
        subs,
        on = "year"
    )
    .assign(
        percent = lambda f: round((f["appellants"]/f["Count"]), 3)
    )
    .sort_values("year")
    .loc[ lambda x: x["year"] != "2021" ] # substantiation data for 2021 is incomplete
)

with_subs

Unnamed: 0,year,appellants,Count,percent
0,2015,245,18664.0,0.013
1,2016,238,16886.0,0.014
2,2017,158,15934.0,0.01
3,2018,345,15949.0,0.022
4,2019,289,14441.0,0.02
5,2020,192,9038.0,0.021


In [32]:
# mean percent 2015-2020
with_subs[["appellants", "Count", "percent"]].mean().to_frame("")

Unnamed: 0,Unnamed: 1
appellants,244.5
Count,15152.0
percent,0.016667


---

---

---