In [1]:
import pandas as pd
import numpy as np 
import re
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
from pathlib import Path
from shapely.ops import transform
from pyproj import Transformer

In [2]:
df = pd.read_csv('../notebooks/County_DCS_Region.csv')

In [3]:
df.columns.tolist()

['County_FIPS',
 'County',
 'DCS_Region',
 'n_youth',
 'Mental Health - Behavioral & Impulse Control - High Risk',
 'Mental Health - Cultural, Spiritual, and Engagement Factors - High Risk',
 'Mental Health - Mood & Emotional Regulation - High Risk',
 'Mental Health - Physical & Medical - High Risk',
 'Mental Health - Psychotic & Thought Disturbances - High Risk',
 'Mental Health - Risk & Safety - High Risk',
 'Mental Health - Sexual & Developmental Concerns - High Risk',
 'Mental Health - Trauma & Stress-Related - High Risk',
 'Supportive Adult Relationships - Family Relationships - High Risk',
 'Supportive Adult Relationships - Social Relationships - High Risk',
 'Supportive Adult Relationships - Barriers to Support - High Risk',
 'Housing Instability - Skills & Readiness - High Risk',
 'Housing Instability - Current Living Situation - High Risk',
 'Housing Instability - Barriers & Risks - High Risk',
 'Decision-Making (Judgement)',
 'Impulsivity/Hyperactivity',
 'School Behavior',
 

In [4]:
df['County'] = (
    df['County'].astype(str).str.strip()
      .str.replace(r'\s*County$', '', regex=True)  # remove existing "County" suffix if present
      .str.strip() + ' County'                     # append once
)

In [5]:
df.head()

Unnamed: 0,County_FIPS,County,DCS_Region,n_youth,Mental Health - Behavioral & Impulse Control - High Risk,"Mental Health - Cultural, Spiritual, and Engagement Factors - High Risk",Mental Health - Mood & Emotional Regulation - High Risk,Mental Health - Physical & Medical - High Risk,Mental Health - Psychotic & Thought Disturbances - High Risk,Mental Health - Risk & Safety - High Risk,...,Natural Supports,Neglect,Oppositional(Non-compliance with Authority),Relationship Permanence,Social Functioning,Independent Living Skills,Living Situation,Physical Abuse,Runaway*,Youth Residential Stability
0,47065,Hamilton County,Tennessee Valley,515,52.4272,68.932,53.7864,6.2136,1.5534,23.8835,...,74.7573,30.8738,27.1845,37.0874,34.9515,13.0097,21.7476,16.8932,10.4854,6.6019
1,47115,Marion County,Tennessee Valley,515,52.4272,68.932,53.7864,6.2136,1.5534,23.8835,...,74.7573,30.8738,27.1845,37.0874,34.9515,13.0097,21.7476,16.8932,10.4854,6.6019
2,47185,White County,Upper Cumberland,428,54.2056,68.4579,48.5981,5.8411,1.1682,13.785,...,72.8972,61.9159,12.6168,50.2336,20.0935,9.1121,18.4579,28.0374,7.0093,9.3458
3,47129,Morgan County,East,311,54.3408,76.2058,60.1286,12.8617,2.2508,25.0804,...,78.135,58.8424,24.7588,45.9807,33.119,13.8264,17.3633,21.5434,13.1833,7.074
4,47013,Campbell County,East,311,54.3408,76.2058,60.1286,12.8617,2.2508,25.0804,...,78.135,58.8424,24.7588,45.9807,33.119,13.8264,17.3633,21.5434,13.1833,7.074


In [6]:
# drop leading 47 via modulo, then convert to 3-char string
df['County_FIPS'] = (df['County_FIPS'] % 1000).astype(int).astype(str).str.zfill(3)

In [7]:
df.head()

Unnamed: 0,County_FIPS,County,DCS_Region,n_youth,Mental Health - Behavioral & Impulse Control - High Risk,"Mental Health - Cultural, Spiritual, and Engagement Factors - High Risk",Mental Health - Mood & Emotional Regulation - High Risk,Mental Health - Physical & Medical - High Risk,Mental Health - Psychotic & Thought Disturbances - High Risk,Mental Health - Risk & Safety - High Risk,...,Natural Supports,Neglect,Oppositional(Non-compliance with Authority),Relationship Permanence,Social Functioning,Independent Living Skills,Living Situation,Physical Abuse,Runaway*,Youth Residential Stability
0,65,Hamilton County,Tennessee Valley,515,52.4272,68.932,53.7864,6.2136,1.5534,23.8835,...,74.7573,30.8738,27.1845,37.0874,34.9515,13.0097,21.7476,16.8932,10.4854,6.6019
1,115,Marion County,Tennessee Valley,515,52.4272,68.932,53.7864,6.2136,1.5534,23.8835,...,74.7573,30.8738,27.1845,37.0874,34.9515,13.0097,21.7476,16.8932,10.4854,6.6019
2,185,White County,Upper Cumberland,428,54.2056,68.4579,48.5981,5.8411,1.1682,13.785,...,72.8972,61.9159,12.6168,50.2336,20.0935,9.1121,18.4579,28.0374,7.0093,9.3458
3,129,Morgan County,East,311,54.3408,76.2058,60.1286,12.8617,2.2508,25.0804,...,78.135,58.8424,24.7588,45.9807,33.119,13.8264,17.3633,21.5434,13.1833,7.074
4,13,Campbell County,East,311,54.3408,76.2058,60.1286,12.8617,2.2508,25.0804,...,78.135,58.8424,24.7588,45.9807,33.119,13.8264,17.3633,21.5434,13.1833,7.074


In [8]:
df.dtypes

County_FIPS                                                                 object
County                                                                      object
DCS_Region                                                                  object
n_youth                                                                      int64
Mental Health - Behavioral & Impulse Control - High Risk                   float64
Mental Health - Cultural, Spiritual, and Engagement Factors - High Risk    float64
Mental Health - Mood & Emotional Regulation - High Risk                    float64
Mental Health - Physical & Medical - High Risk                             float64
Mental Health - Psychotic & Thought Disturbances - High Risk               float64
Mental Health - Risk & Safety - High Risk                                  float64
Mental Health - Sexual & Developmental Concerns - High Risk                float64
Mental Health - Trauma & Stress-Related - High Risk                        float64
Supp

In [11]:
# drop leading 47 via modulo, then convert to 3-char string
df['County_FIPS'] = (df['County_FIPS']).astype(str)

In [12]:
df.to_csv('County_DCS_Region_Matched.csv')