In [1]:
import pandas as pd
import numpy as np 

import seaborn as sns
import matplotlib.pyplot as plt

import geopandas as gpd 

In [2]:
df_risk = pd.read_csv('../data/bdaic created tables/df_high_risk.csv')

In [3]:
df_risk.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Oppositional(Non-compliance with Authority),Relationship Permanence,Social Functioning,Independent Living Skills,Living Situation,Physical Abuse,Runaway*,Youth Residential Stability,County,Region
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,0,0,0,0.0,0,0,0,0.0,Madison,West
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0,0,0,0.0,0,0,0,0.0,Sullivan,East
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0,0,0,0.0,0,0,0,0.0,Warren,Middle
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,0,1,1,1.0,0,0,0,1.0,Hamilton,East
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,1,0,0,0.0,0,0,0,0.0,Davidson,Middle


In [4]:
df_risk.drop(columns=['Region'], inplace=True)

In [5]:
df_risk_buckets = pd.read_csv('tn_high_risk_buckets_by_NAME.csv')

In [6]:
df_risk_buckets.head()

Unnamed: 0,GEOID,NAME,Region,Mental_Health_Behavioral_Impulse_Control,Mental_Health_Cultural_Spiritual_and_Engagement_Factors,Mental_Health_Mood_Emotional_Regulation,Mental_Health_Physical_Medical,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,47065,Hamilton,East,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
1,47115,Marion,East,0.0,1.0,,0.0,0.0,0.0,0.0,,1.0,1.0,1.0,0.0,1.0,0.0
2,47185,White,Middle,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0
3,47129,Morgan,East,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
4,47013,Campbell,East,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0


## Merge

In [7]:
# Light normalization: strip, upper, remove " COUNTY" suffix, collapse spaces
def normalize_county(s: pd.Series) -> pd.Series:
    s = s.astype(str).str.strip().str.upper()
    s = s.str.replace(r"\s+COUNTY$", "", regex=True)  # drop trailing " COUNTY"
    s = s.str.replace(r"\s+", " ", regex=True)        # collapse spaces
    return s

df_risk = df_risk.copy()
df_risk_buckets = df_risk_buckets.copy()
df_risk["__key"] = normalize_county(df_risk["County"])
df_risk_buckets["__key"] = normalize_county(df_risk_buckets["NAME"])

# Merge (choose how='left'/'inner' as needed)
merged = df_risk.merge(df_risk_buckets, on="__key", how="left", suffixes=("_left", "_right"))

# Optional: keep original names and drop the helper key
merged = merged.drop(columns=["__key"])

# Quick check: which df1 rows didn’t find a match in df2
unmatched = merged[df_risk_buckets.columns.difference(["__key"]).to_list()[0]].isna()
unmatched_rows = merged[merged["NAME"].isna()]

In [8]:
merged.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [9]:
unmatched_rows.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks


In [10]:
df_risk_both = merged.copy()

In [11]:
df_risk_both.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [12]:
{"County": "County" in df_risk_both.columns, "NAME": "NAME" in df_risk_both.columns}

{'County': True, 'NAME': True}

In [13]:
len(df_risk_both)

5667

## Add correct regions

In [14]:
region = pd.read_csv('regions corrected.csv')

In [15]:
region.head()

Unnamed: 0.1,Unnamed: 0,County Name,Region
0,0,Obion County,West
1,1,Weakley County,West
2,2,Dyer County,West
3,3,Gibson County,West
4,4,Crockett County,West


In [16]:
def normalize(s):
    return (s.astype(str)
              .str.strip().str.upper()
              .str.replace(r"\s+COUNTY$", "", regex=True)
              .str.replace(r"\s+", " ", regex=True))

df_risks_region = df_risk_both.copy()
region = region.copy()
df_risks_region["__key"] = normalize(df_risks_region["County"])
region["__key"] = normalize(region["County Name"])  # or whatever the column is

merged_2 = df_risks_region.merge(region[["__key", "Region"]], on="__key", how="left").drop(columns="__key")

In [18]:
print("left cols:", df_risks_region.columns.tolist()[:10], "…")
print("right cols:", region.columns.tolist()[:10], "…")
print("merged shape:", merged_2.shape)

left cols: ['PERSON ID', 'CURRENT AGE', 'COMMITMENT COUNTY', 'RESPONSIBLE COUNTY', 'GENDER', 'REMOVAL ZIP CODE', 'PLACEMENT ZIP CODE', 'LOCATION BEGIN DATE', 'LOCATION END DATE', 'Decision-Making (Judgement)'] …
right cols: ['Unnamed: 0', 'County Name', 'Region', '__key'] …
merged shape: (5667, 70)


In [19]:
merged_2.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks,Region_y
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,Mid West
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,Northeast
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,TN Valley
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,TN Valley
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,Mid-State


In [20]:
len(merged_2)

5667

In [21]:
merged_2.columns

Index(['PERSON ID', 'CURRENT AGE', 'COMMITMENT COUNTY', 'RESPONSIBLE COUNTY',
       'GENDER', 'REMOVAL ZIP CODE', 'PLACEMENT ZIP CODE',
       'LOCATION BEGIN DATE', 'LOCATION END DATE',
       'Decision-Making (Judgement)', 'Impulsivity/Hyperactivity',
       'School Behavior', 'Substance Use', 'Spiritual/Religious',
       'Traditions And Rituals', 'Youth Involvement With Care',
       'Anger Control', 'Anxiety', 'Depression',
       'Emotional And/Or Physical Dysregulation', 'Numbing',
       'Medical/Physical', 'Psychosis (Thought Disorder)', 'Danger To Others',
       'Intentional Misbehavior', 'Non-Suicidal Self-Injurious Behavior',
       'Other Self-Harm (Recklessness)', 'Suicide Risk', 'Sexual Aggression',
       'Sexual Development', 'Adjustment To Trauma', 'Cultural Stress',
       'Emotional Abuse', 'Environmental Influences', 'Medical Trauma',
       'Sexual Abuse', 'Attachment Difficulties', 'Community Life',
       'Family Functioning', 'Family Strengths',
       'Inter

In [22]:
df_risk_all = merged_2.drop(columns=['Region_x'])

In [23]:
df_risk_all.rename(columns={'Region_y': 'Region'}, inplace=True)

In [24]:
df_risk_all.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks,Region
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,Mid West
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,Northeast
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,TN Valley
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,TN Valley
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,Mid-State


In [None]:
df_risk_all.

In [None]:
df_risk_all = merged.drop(columns=['CURRENT AGE', 'COMMITMENT COUNTY', 'RESPONSIBLE COUNTY',
       'GENDER', 'REMOVAL ZIP CODE', 'PLACEMENT ZIP CODE',
       'LOCATION BEGIN DATE', 'LOCATION END DATE', 'Region_x'])

In [None]:
df_risk_all.head()

Unnamed: 0,PERSON ID,Decision-Making (Judgement),Impulsivity/Hyperactivity,School Behavior,Substance Use,Spiritual/Religious,Traditions And Rituals,Youth Involvement With Care,Anger Control,Anxiety,...,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks,Region_y
0,353258,0,0,0,0,0,0,0.0,0,0,...,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,Mid West
1,706512,0,0,0,0,0,0,0.0,0,0,...,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,Northeast
2,1348374,0,0,0,0,0,0,0.0,0,0,...,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,TN Valley
3,2470086,1,0,0,1,1,0,0.0,1,1,...,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,TN Valley
4,3965628,1,1,0,1,0,0,0.0,0,0,...,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,Mid-State


In [None]:
df_risk_all.rename(columns={'Region_y': 'Region'}, inplace=True)

In [25]:
df_risk_all.to_csv('df_risk_fixed.csv', index=False)

## Number of Youth per Region

In [26]:
# Required inputs: merged has columns 'Region' and 'Person ID'

counts = (
    df_risk_all.groupby("Region", dropna=False)["PERSON ID"]
          .nunique()
          .reset_index(name="unique_persons")
)

total_unique = counts["unique_persons"].sum()
counts["percent"] = (counts["unique_persons"] / total_unique * 100).round(2)

# Append a total row
total_row = pd.DataFrame([{"Region": "Total", "unique_persons": total_unique, "percent": 100.0}])
counts_with_total = pd.concat([counts, total_row], ignore_index=True)

# Optional: sort by count descending, keeping Total at bottom
counts_with_total = (
    counts_with_total
      .sort_values(["Region"], ascending=[True])
      .sort_values("unique_persons", ascending=False, kind="stable")
)

counts_with_total

Unnamed: 0,Region,unique_persons,percent
6,Total,5667,100.0
2,Mid-State,1201,21.19
5,West,979,17.28
3,Northeast,943,16.64
1,Mid West,935,16.5
0,East,880,15.53
4,TN Valley,729,12.86


In [27]:
n_youth = pd.DataFrame(
    df_risk_all.groupby("Region", dropna=False)["PERSON ID"]
          .nunique()
          .reset_index(name="n_youth")
)
n_youth

Unnamed: 0,Region,n_youth
0,East,880
1,Mid West,935
2,Mid-State,1201
3,Northeast,943
4,TN Valley,729
5,West,979


In [28]:
def normalize(s):
    return (s.astype(str)
              .str.strip()
              .str.upper()
              .str.replace(r"\s+", " ", regex=True))

# Make keys
df_risk_all = df_risk_all.copy()
n_youth = n_youth.copy()
df_risk_all["__key"] = normalize(df_risk_all["Region"])
n_youth["__key"] = normalize(n_youth["Region"])

# Keep one row per normalized region with its n_youth
n_youth_keyed = n_youth[["__key", "n_youth"]].drop_duplicates("__key")

# Merge counts onto df_risk_all
df_risk_all = (
    df_risk_all.merge(n_youth_keyed, on="__key", how="left")
               .drop(columns="__key")
)

In [29]:
df_risk_all.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks,Region,n_youth
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,Mid West,935
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,Northeast,943
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,TN Valley,729
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,TN Valley,729
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,Mid-State,1201


In [None]:
df_risk_all.columns

Index(['PERSON ID', 'Decision-Making (Judgement)', 'Impulsivity/Hyperactivity',
       'School Behavior', 'Substance Use', 'Spiritual/Religious',
       'Traditions And Rituals', 'Youth Involvement With Care',
       'Anger Control', 'Anxiety', 'Depression',
       'Emotional And/Or Physical Dysregulation', 'Numbing',
       'Medical/Physical', 'Psychosis (Thought Disorder)', 'Danger To Others',
       'Intentional Misbehavior', 'Non-Suicidal Self-Injurious Behavior',
       'Other Self-Harm (Recklessness)', 'Suicide Risk', 'Sexual Aggression',
       'Sexual Development', 'Adjustment To Trauma', 'Cultural Stress',
       'Emotional Abuse', 'Environmental Influences', 'Medical Trauma',
       'Sexual Abuse', 'Attachment Difficulties', 'Community Life',
       'Family Functioning', 'Family Strengths',
       'Interpersonal/Social Connectedness', 'Natural Supports', 'Neglect',
       'Oppositional(Non-compliance with Authority)',
       'Relationship Permanence', 'Social Functioning',
  

In [30]:
df_risk_all.to_csv('df_cans_fixed.csv', index=False)

### Percents 

In [31]:
exclude = {"PERSON ID", "County", "Region", "n_youth"}
indicator_cols = [c for c in df_risk_all.columns if c not in exclude]

percent_ones_by_county = (
    (df_risk_all[indicator_cols] == 1)
      .groupby(df_risk_all["County"], dropna=False)
      .mean()
      .mul(100)
      .round(2)
      .reset_index()
)

percent_ones_by_county.head()

Unnamed: 0,County,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,Anderson,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.37,...,0.0,0.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,100.0
1,Bedford,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,...,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,0.0,0.0
2,Benton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76.92,...,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,100.0,0.0
3,Bledsoe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.67,...,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
4,Blount,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.44,...,0.0,100.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,0.0


In [None]:
exclude = {"PERSON ID", "County", "Region", "n_youth"}
indicator_cols = [c for c in df_risk_all.columns if c not in exclude]

# collapse to one record per youth per region (1 if youth ever had a 1)
per_person = (
    df_risk_all.groupby(["Region", "PERSON ID"], dropna=False)[indicator_cols]
               .max()  # or .any() for booleans
               .reset_index()
)

# percent of youth with 1 by region
percent_ones_by_region = (
    per_person[indicator_cols].eq(1)
             .groupby(per_person["Region"], dropna=False)
             .mean()
             .mul(100).round(2)
             .reset_index()
)

In [None]:
percent_ones_by_region.head()

Unnamed: 0,Region,Decision-Making (Judgement),Impulsivity/Hyperactivity,School Behavior,Substance Use,Spiritual/Religious,Traditions And Rituals,Youth Involvement With Care,Anger Control,Anxiety,...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,East,45.57,33.07,11.93,19.77,73.52,0.34,17.05,27.61,38.64,...,0.0,37.05,0.0,75.57,94.55,98.52,89.32,13.75,52.39,22.16
1,Mid West,40.32,27.59,13.8,21.93,60.86,0.11,12.83,24.28,25.24,...,0.0,23.96,0.0,68.24,95.08,100.0,70.8,4.6,38.07,3.42
2,Mid-State,50.79,28.56,16.82,26.23,71.27,0.25,13.99,28.39,21.32,...,0.0,49.29,0.0,16.99,57.04,100.0,63.03,20.48,19.98,24.15
3,Northeast,50.8,34.57,10.5,17.07,81.87,0.74,16.01,23.65,41.89,...,0.0,20.15,10.6,31.5,99.58,97.77,96.29,24.81,32.98,17.18
4,TN Valley,41.02,20.99,11.52,25.79,64.33,0.69,10.56,24.42,20.99,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84


## Adding county and stuff

In [None]:
exclude = {"PERSON ID", "County", "Region", "n_youth"}
indicator_cols = [c for c in df_risk_all.columns if c not in exclude]

# collapse to one record per youth per region (1 if youth ever had a 1)
per_person = (
    df_risk_all.groupby(["County", "PERSON ID"], dropna=False)[indicator_cols]
               .max()  # or .any() for booleans
               .reset_index()
)

# percent of youth with 1 by region
percent_ones_by_county= (
    per_person[indicator_cols].eq(1)
             .groupby(per_person["County"], dropna=False)
             .mean()
             .mul(100).round(2)
             .reset_index()
)

In [None]:
percent_ones_by_county.head()

Unnamed: 0,County,Decision-Making (Judgement),Impulsivity/Hyperactivity,School Behavior,Substance Use,Spiritual/Religious,Traditions And Rituals,Youth Involvement With Care,Anger Control,Anxiety,...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,Anderson,40.37,22.02,11.93,19.27,77.98,0.92,15.6,26.61,41.28,...,0.0,0.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,100.0
1,Bedford,50.0,32.0,12.0,18.0,60.0,0.0,8.0,22.0,22.0,...,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,0.0,0.0
2,Benton,76.92,61.54,0.0,15.38,61.54,0.0,7.69,69.23,61.54,...,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,100.0,0.0
3,Bledsoe,26.67,20.0,6.67,26.67,53.33,0.0,0.0,20.0,26.67,...,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
4,Blount,64.44,36.67,7.78,12.22,75.56,1.11,7.78,20.0,48.89,...,0.0,100.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,0.0


In [None]:
percent_ones_by_county.to_csv("percent_ones_by_county.csv")

## Region

In [None]:
# unique Region–County index
county_index = (
    df_risk_all[["Region", "County"]]
        .dropna(subset=["County"])
        .drop_duplicates()
)

# percent_ones_by_region should already have Region-level columns (incl. n_youth)
# replicate region metrics to each county
percent_ones_by_county = county_index.merge(
    percent_ones_by_region, on="Region", how="left"
)

percent_ones_by_county.head()

Unnamed: 0,Region,County,Decision-Making (Judgement),Impulsivity/Hyperactivity,School Behavior,Substance Use,Spiritual/Religious,Traditions And Rituals,Youth Involvement With Care,Anger Control,...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,Mid West,Madison,40.32,27.59,13.8,21.93,60.86,0.11,12.83,24.28,...,0.0,23.96,0.0,68.24,95.08,100.0,70.8,4.6,38.07,3.42
1,Northeast,Sullivan,50.8,34.57,10.5,17.07,81.87,0.74,16.01,23.65,...,0.0,20.15,10.6,31.5,99.58,97.77,96.29,24.81,32.98,17.18
2,TN Valley,Warren,41.02,20.99,11.52,25.79,64.33,0.69,10.56,24.42,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84
3,TN Valley,Hamilton,41.02,20.99,11.52,25.79,64.33,0.69,10.56,24.42,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84
4,Mid-State,Davidson,50.79,28.56,16.82,26.23,71.27,0.25,13.99,28.39,...,0.0,49.29,0.0,16.99,57.04,100.0,63.03,20.48,19.98,24.15


In [None]:
percent_ones_by_county.to_csv('percent_by_region.csv')

In [None]:
percent_ones_by_county.head()

Unnamed: 0,Region,County,Decision-Making (Judgement),Impulsivity/Hyperactivity,School Behavior,Substance Use,Spiritual/Religious,Traditions And Rituals,Youth Involvement With Care,Anger Control,...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,Mid West,Madison,40.32,27.59,13.8,21.93,60.86,0.11,12.83,24.28,...,0.0,23.96,0.0,68.24,95.08,100.0,70.8,4.6,38.07,3.42
1,Northeast,Sullivan,50.8,34.57,10.5,17.07,81.87,0.74,16.01,23.65,...,0.0,20.15,10.6,31.5,99.58,97.77,96.29,24.81,32.98,17.18
2,TN Valley,Warren,41.02,20.99,11.52,25.79,64.33,0.69,10.56,24.42,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84
3,TN Valley,Hamilton,41.02,20.99,11.52,25.79,64.33,0.69,10.56,24.42,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84
4,Mid-State,Davidson,50.79,28.56,16.82,26.23,71.27,0.25,13.99,28.39,...,0.0,49.29,0.0,16.99,57.04,100.0,63.03,20.48,19.98,24.15


In [None]:
percent_ones_by_county = pd.read_csv("percent_by_region.csv")

In [None]:
# Append " County" to each value, avoiding duplicates if it already ends with "County"
percent_ones_by_county['County'] = percent_ones_by_county['County'].str.replace(r'\s*County$', '', regex=True).str.strip() + ' County'

In [None]:
percent_ones_by_county.head()

Unnamed: 0.1,Unnamed: 0,Region,County,Decision-Making (Judgement),Impulsivity/Hyperactivity,School Behavior,Substance Use,Spiritual/Religious,Traditions And Rituals,Youth Involvement With Care,...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,0,Mid West,Madison County,40.32,27.59,13.8,21.93,60.86,0.11,12.83,...,0.0,23.96,0.0,68.24,95.08,100.0,70.8,4.6,38.07,3.42
1,1,Northeast,Sullivan County,50.8,34.57,10.5,17.07,81.87,0.74,16.01,...,0.0,20.15,10.6,31.5,99.58,97.77,96.29,24.81,32.98,17.18
2,2,TN Valley,Warren County,41.02,20.99,11.52,25.79,64.33,0.69,10.56,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84
3,3,TN Valley,Hamilton County,41.02,20.99,11.52,25.79,64.33,0.69,10.56,...,0.0,34.57,7.0,39.23,81.89,88.2,85.19,0.27,15.5,41.84
4,4,Mid-State,Davidson County,50.79,28.56,16.82,26.23,71.27,0.25,13.99,...,0.0,49.29,0.0,16.99,57.04,100.0,63.03,20.48,19.98,24.15


In [None]:
def normalize(s):
    return (s.astype(str)
              .str.strip().str.upper()
              .str.replace(r"\s+COUNTY$", "", regex=True)
              .str.replace(r"\s+", " ", regex=True))

# Build join key
percent_ones_by_county = percent_ones_by_county.copy()
Region = Region.copy()

percent_ones_by_county["__key"] = normalize(percent_ones_by_county["County"])
Region["__key"] = normalize(Region["County Name"])  # adjust if column differs

# If Region has duplicate counties, pick one or aggregate as needed
# Region = Region.sort_values("some_preference").drop_duplicates("__key", keep="first")
# or Region = Region.groupby("__key", as_index=False)["n_youth"].sum()

# Merge n_youth into percent_ones_by_county
percent_ones_by_county = (
    percent_ones_by_county
      .merge(Region[["__key", "n_youth"]], on="__key", how="left")
      .drop(columns="__key")
)

In [None]:
n_youth.head()

Unnamed: 0,Region,n_youth,__key
0,East,880,EAST
1,Mid West,935,MID WEST
2,Mid-State,1201,MID-STATE
3,Northeast,943,NORTHEAST
4,TN Valley,729,TN VALLEY


In [None]:
def norm_region(s):
    return (s.astype(str).str.strip().str.upper().str.replace(r"\s+", " ", regex=True))

percent_ones_by_county["__key"] = norm_region(percent_ones_by_county["Region"])
n_youth["__key"] = norm_region(n_youth["Region"])

percent_ones_by_county = (
    percent_ones_by_county
      .merge(n_youth[["__key", "n_youth"]], on="__key", how="left")
      .drop(columns="__key")
)

In [None]:
def normalize(s):
    return (s.astype(str)
              .str.strip().str.upper()
              .str.replace(r"\s+COUNTY$", "", regex=True)
              .str.replace(r"\s+", " ", regex=True))

# Build join key
percent_ones_by_county = percent_ones_by_county.copy()
n_youth = n_youth.copy()

percent_ones_by_county["__key"] = normalize(percent_ones_by_county["Region"])
n_youth["__key"] = normalize(n_youth["Region"])  # adjust if column differs

# If Region has duplicate counties, pick one or aggregate as needed
# Region = Region.sort_values("some_preference").drop_duplicates("__key", keep="first")
# or Region = Region.groupby("__key", as_index=False)["n_youth"].sum()

# Merge n_youth into percent_ones_by_county
percent_ones_by_county = (
    percent_ones_by_county
      .merge(Region[["__key", "n_youth"]], on="__key", how="left")
      .drop(columns="__key")
)

NameError: name 'Region' is not defined

In [None]:
percent_ones_by_county.to_csv("percent_by_region.csv")

In [32]:
# Calculate % of 1s out of sum of all times County is present in dataset for each PersonID

# First, let's understand the data structure
print("Data shape:", df_risk_all.shape)
print("Unique Person IDs:", df_risk_all['PERSON ID'].nunique())
print("Unique Counties:", df_risk_all['County'].nunique())

# Count how many times each County appears for each Person ID
county_person_counts = (
    df_risk_all.groupby(['County', 'PERSON ID'], dropna=False)
    .size()
    .reset_index(name='county_appearances')
)

print("\nCounty-Person ID combinations:")
print(county_person_counts.head(10))


Data shape: (5667, 70)
Unique Person IDs: 5667
Unique Counties: 95

County-Person ID combinations:
     County  PERSON ID  county_appearances
0  Anderson    4878824                   1
1  Anderson    4915834                   1
2  Anderson    5064090                   1
3  Anderson    5218392                   1
4  Anderson    5307594                   1
5  Anderson    6048532                   1
6  Anderson    6381992                   1
7  Anderson    6553852                   1
8  Anderson    6597416                   1
9  Anderson    6682206                   1


In [33]:
# Now calculate the percentage of 1s for each indicator by county
# This will be: (sum of 1s for each indicator by county) / (total county appearances by PersonID)

exclude = {"PERSON ID", "County", "Region", "n_youth", "CURRENT AGE", "COMMITMENT COUNTY", 
           "RESPONSIBLE COUNTY", "GENDER", "REMOVAL ZIP CODE", "PLACEMENT ZIP CODE",
           "LOCATION BEGIN DATE", "LOCATION END DATE", "GEOID", "NAME"}
indicator_cols = [c for c in df_risk_all.columns if c not in exclude]

print("Indicator columns:")
print(indicator_cols[:10], "...")  # Show first 10
print(f"Total indicator columns: {len(indicator_cols)}")

# Step 1: Count total appearances of each County for each Person ID
county_person_totals = (
    df_risk_all.groupby(['County', 'PERSON ID'], dropna=False)
    .size()
    .reset_index(name='total_county_appearances')
)

# Step 2: Count 1s for each indicator by County and Person ID
indicator_ones_by_county_person = (
    df_risk_all.groupby(['County', 'PERSON ID'], dropna=False)[indicator_cols]
    .sum()  # Sum of 1s for each indicator
    .reset_index()
)

# Step 3: Merge with total appearances
county_person_analysis = indicator_ones_by_county_person.merge(
    county_person_totals, on=['County', 'PERSON ID'], how='left'
)

print("\nSample of merged data:")
print(county_person_analysis.head())


Indicator columns:
['Decision-Making (Judgement)', 'Impulsivity/Hyperactivity', 'School Behavior', 'Substance Use', 'Spiritual/Religious', 'Traditions And Rituals', 'Youth Involvement With Care', 'Anger Control', 'Anxiety', 'Depression'] ...
Total indicator columns: 56

Sample of merged data:
     County  PERSON ID  Decision-Making (Judgement)  \
0  Anderson    4878824                            1   
1  Anderson    4915834                            1   
2  Anderson    5064090                            1   
3  Anderson    5218392                            0   
4  Anderson    5307594                            1   

   Impulsivity/Hyperactivity  School Behavior  Substance Use  \
0                          1                0              0   
1                          0                1              0   
2                          0                0              1   
3                          0                0              0   
4                          0                0          

In [34]:
# Step 4: Calculate percentages for each indicator
# For each indicator: (sum of 1s) / (total county appearances for that Person ID)

# Create a copy to work with
percent_calculation = county_person_analysis.copy()

# Calculate percentage for each indicator column
for col in indicator_cols:
    percent_calculation[f'{col}_percent'] = (
        percent_calculation[col] / percent_calculation['total_county_appearances'] * 100
    )

# Show sample results
print("Sample percentage calculations:")
sample_cols = ['County', 'PERSON ID', 'total_county_appearances'] + indicator_cols[:3] + [f'{col}_percent' for col in indicator_cols[:3]]
print(percent_calculation[sample_cols].head(10))


Sample percentage calculations:
     County  PERSON ID  total_county_appearances  Decision-Making (Judgement)  \
0  Anderson    4878824                         1                            1   
1  Anderson    4915834                         1                            1   
2  Anderson    5064090                         1                            1   
3  Anderson    5218392                         1                            0   
4  Anderson    5307594                         1                            1   
5  Anderson    6048532                         1                            1   
6  Anderson    6381992                         1                            0   
7  Anderson    6553852                         1                            0   
8  Anderson    6597416                         1                            1   
9  Anderson    6682206                         1                            1   

   Impulsivity/Hyperactivity  School Behavior  \
0                          

In [35]:
# Step 5: Aggregate by County to get average percentages
# This gives us the average percentage of 1s per indicator by county

percent_cols = [f'{col}_percent' for col in indicator_cols]

county_percentages = (
    percent_calculation.groupby('County', dropna=False)[percent_cols]
    .mean()
    .round(2)
    .reset_index()
)

# Rename columns to remove '_percent' suffix for cleaner output
county_percentages.columns = ['County'] + indicator_cols

print("Final result: Percentage of 1s by County (out of total county appearances per Person ID)")
print(county_percentages.head())

# Save the result
county_percentages.to_csv('percent_ones_by_county_corrected.csv', index=False)
print(f"\nSaved to: percent_ones_by_county_corrected.csv")
print(f"Shape: {county_percentages.shape}")


Final result: Percentage of 1s by County (out of total county appearances per Person ID)
     County  Decision-Making (Judgement)  Impulsivity/Hyperactivity  \
0  Anderson                        40.37                      22.02   
1   Bedford                        50.00                      32.00   
2    Benton                        76.92                      61.54   
3   Bledsoe                        26.67                      20.00   
4    Blount                        64.44                      36.67   

   School Behavior  Substance Use  Spiritual/Religious  \
0            11.93          19.27                77.98   
1            12.00          18.00                60.00   
2             0.00          15.38                61.54   
3             6.67          26.67                53.33   
4             7.78          12.22                75.56   

   Traditions And Rituals  Youth Involvement With Care  Anger Control  \
0                    0.92                        15.60        

In [36]:
# Let's also create a comparison to show the difference between the two approaches

# Original approach (from your existing code)
exclude_orig = {"PERSON ID", "County", "Region", "n_youth"}
indicator_cols_orig = [c for c in df_risk_all.columns if c not in exclude_orig]

percent_ones_by_county_original = (
    (df_risk_all[indicator_cols_orig] == 1)
      .groupby(df_risk_all["County"], dropna=False)
      .mean()
      .mul(100)
      .round(2)
      .reset_index()
)

print("COMPARISON:")
print("\n1. Original approach (mean of 1s by county):")
print(percent_ones_by_county_original[['County'] + indicator_cols_orig[:3]].head())

print("\n2. New approach (% of 1s out of total county appearances per Person ID):")
print(county_percentages[['County'] + indicator_cols[:3]].head())

print("\nKey difference:")
print("- Original: Calculates the percentage of records with value=1 for each indicator by county")
print("- New: Calculates the percentage of 1s relative to how many times each county appears for each Person ID")


COMPARISON:

1. Original approach (mean of 1s by county):
     County  CURRENT AGE  COMMITMENT COUNTY  RESPONSIBLE COUNTY
0  Anderson          0.0                0.0                 0.0
1   Bedford          0.0                0.0                 0.0
2    Benton          0.0                0.0                 0.0
3   Bledsoe          0.0                0.0                 0.0
4    Blount          0.0                0.0                 0.0

2. New approach (% of 1s out of total county appearances per Person ID):
     County  Decision-Making (Judgement)  Impulsivity/Hyperactivity  \
0  Anderson                        40.37                      22.02   
1   Bedford                        50.00                      32.00   
2    Benton                        76.92                      61.54   
3   Bledsoe                        26.67                      20.00   
4    Blount                        64.44                      36.67   

   School Behavior  
0            11.93  
1            12

In [39]:
merged.head()

Unnamed: 0,PERSON ID,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,353258,17,Madison,Madison,MALE,37040.0,37040.0,2023-06-13,2024-05-25,0,...,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
1,706512,17,Sullivan,Sullivan,FEMALE,37642.0,37642.0,2024-11-18,,0,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0
2,1348374,17,Warren,Warren,FEMALE,37110.0,37110.0,2023-06-02,2023-09-30,0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
3,2470086,18,Hamilton,Hamilton,MALE,37218.0,37218.0,2024-09-17,,1,...,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
4,3965628,21,Davidson,Davidson,MALE,37725.0,37725.0,2021-12-22,2022-08-31,1,...,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [38]:
# Save the county_percentages dataframe
# This dataframe contains the percentage of 1s out of total county appearances per Person ID

print("Saving county_percentages dataframe...")
county_percentages.to_csv('percent_ones_by_county_corrected.csv', index=False)
print("✅ Saved as: percent_ones_by_county_corrected.csv")

# Also save as Excel for easier viewing
county_percentages.to_excel('percent_ones_by_county_corrected.xlsx', index=False)
print("✅ Also saved as: percent_ones_by_county_corrected.xlsx")

print(f"\nDataframe info:")
print(f"- Shape: {county_percentages.shape}")
print(f"- Columns: {len(county_percentages.columns)}")
print(f"- Counties: {len(county_percentages['County'].unique())}")

# Show first few rows
print(f"\nFirst 5 rows:")
print(county_percentages.head())


Saving county_percentages dataframe...
✅ Saved as: percent_ones_by_county_corrected.csv
✅ Also saved as: percent_ones_by_county_corrected.xlsx

Dataframe info:
- Shape: (95, 57)
- Columns: 57
- Counties: 95

First 5 rows:
     County  Decision-Making (Judgement)  Impulsivity/Hyperactivity  \
0  Anderson                        40.37                      22.02   
1   Bedford                        50.00                      32.00   
2    Benton                        76.92                      61.54   
3   Bledsoe                        26.67                      20.00   
4    Blount                        64.44                      36.67   

   School Behavior  Substance Use  Spiritual/Religious  \
0            11.93          19.27                77.98   
1            12.00          18.00                60.00   
2             0.00          15.38                61.54   
3             6.67          26.67                53.33   
4             7.78          12.22                75.56   

  

In [37]:
percent_ones_by_county.head()

Unnamed: 0,County,CURRENT AGE,COMMITMENT COUNTY,RESPONSIBLE COUNTY,GENDER,REMOVAL ZIP CODE,PLACEMENT ZIP CODE,LOCATION BEGIN DATE,LOCATION END DATE,Decision-Making (Judgement),...,Mental_Health_Psychotic_Thought_Disturbances,Mental_Health_Risk_Safety,Mental_Health_Sexual_Developmental_Concerns,Mental_Health_Trauma_Stress_Related,Supportive_Adult_Relationships_Family_Relationships,Supportive_Adult_Relationships_Social_Relationships,Supportive_Adult_Relationships_Barriers_to_Support,Housing_Instability_Skills_Readiness,Housing_Instability_Current_Living_Situation,Housing_Instability_Barriers_Risks
0,Anderson,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.37,...,0.0,0.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,100.0
1,Bedford,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,...,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,0.0,0.0
2,Benton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76.92,...,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,100.0,0.0
3,Bledsoe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.67,...,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
4,Blount,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.44,...,0.0,100.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,0.0
