In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import boto3
import io

## Waves

In [2]:
# ───────────── Settings ─────────────
BUCKET       = 'bdc-public-curated'
PREFIX       = 'ndacan/nytd/outcomes/waves_processed/'

# ───────────── Initialize S3 client & list files ─────────────
s3 = boto3.client('s3')

In [3]:
def s3_read_csv(key):
    """Load a CSV from S3 into a pandas DataFrame."""
    obj = s3.get_object(Bucket=BUCKET, Key=key)
    return pd.read_csv(io.BytesIO(obj['Body'].read()), dtype=str)

In [4]:
all_wave = s3_read_csv(f"{PREFIX}cleaned_all_waves.csv")

## Services

In [5]:
# ───────────── Settings ─────────────
bucket       = 'bdc-public-curated'
prefix       = 'ndacan/nytd/services/'

# ───────────── Initialize S3 client & list files ─────────────
s3_2 = boto3.client('s3')

In [6]:
def s3_read_csv(key):
    """Load a CSV from S3 into a pandas DataFrame."""
    obj = s3_2.get_object(Bucket=bucket, Key=key)
    return pd.read_csv(io.BytesIO(obj['Body'].read()), dtype=str)

In [7]:
services = s3_read_csv(f"{prefix}Services2023.csv")

# Merge Serices and Waves

In [8]:
wave_service = all_wave.merge(services, on='StFCID', how='left')

## AFCARS

In [9]:
# Load AFCARS_TN_processed(in).csv as a DataFrame
afcars = pd.read_csv('AFCARS_TN_processed(in).csv', dtype=str)

In [10]:
afcars.head()

Unnamed: 0,St,StFCID,FY,CLINDIS,NEGLECT,PHYABUSE,SEXABUSE,AAPARENT,DAPARENT,AACHILD,...,HOUSING,CURPLSET,PlacementInstability,LatRemLOS,DiagDis,RR_neglect,RR_abuse,RR_other,PlacementType,MonthsInCare
0,TN,TNOOOOGOGGFFMF,2014,2,0,0,0,0,1,0,...,0,8,5,280,0,0,0,1,3,9.199178645
1,TN,TNOOOOHFLIHFMH,2014,2,0,0,0,0,0,0,...,0,2,1,51,0,0,0,1,1,1.675564682
2,TN,TNOOOOIHNMLFML,2014,2,0,0,0,0,1,0,...,0,7,13,947,0,0,0,1,3,31.11293634
3,TN,TNOOOOIJHFJFML,2014,2,0,0,0,0,0,0,...,0,8,3,185,0,0,0,1,3,6.078028747
4,TN,TNOOOOIKKOFFKK,2014,2,1,0,0,0,1,0,...,1,2,1,85,0,1,0,1,1,2.792607803


In [11]:
afcars = afcars[afcars['St'] == 'TN']

# Merge wave_service with afcars

In [12]:
wave_service_afcars = wave_service.merge(afcars, on='StFCID', how='left')

In [13]:
wave_service_afcars.to_csv('wave_service_afcars.csv', index=False)

# Dependent

##### Post secondary

In [14]:
higher_education_columns = [
    'HighEdCert_w1',
    'HighEdCert_w23',
    'CurrenRoll_w1',
    'CurrenRoll_w23',
    'EducAid_w1',
    'EducAid_w23',
    'HS_or_GED_w1',
    'HS_or_GED_w23',
    'Voc_Certificate_w1',
    'Voc_Certificate_w23',
    'Voc_License_w1',
    'Voc_License_w23',
    'Assoc_Degree_w1',
    'Assoc_Degree_w23',
    'Bach_Degree_w1',
    'Bach_Degree_w23',
    'Higher_Degree_w1',
    'Higher_Degree_w23'
]

##### Full time employment

In [15]:
full_time_employment_columns = [
    'CurrFTE_w1',
    'CurrFTE_w23'
]

##### Full time employment & post secondary

In [16]:
Emplyment_or_post_secondary = [ 'HighEdCert_w1',
    'HighEdCert_w23',
    'CurrenRoll_w1',
    'CurrenRoll_w23',
    'EducAid_w1',
    'EducAid_w23',
    'HS_or_GED_w1',
    'HS_or_GED_w23',
    'Voc_Certificate_w1',
    'Voc_Certificate_w23',
    'Voc_License_w1',
    'Voc_License_w23',
    'Assoc_Degree_w1',
    'Assoc_Degree_w23',
    'Bach_Degree_w1',
    'Bach_Degree_w23',
    'Higher_Degree_w1',
    'Higher_Degree_w23', 
    'CurrFTE_w1',
    'CurrFTE_w23'
]

# Independent

In [17]:
Homeless_wave_23 = ['Homeless_w23']

# Control

In [18]:
# Flag rows where the Sex columns do not all match
wave_service_afcars['Sex_consistent'] = (
    (wave_service_afcars['Sex_x'] == wave_service_afcars['Sex_y']) 
)

# Optional: View mismatched rows
mismatches = wave_service_afcars[~wave_service_afcars['Sex_consistent']]

In [19]:
wave_service_afcars = wave_service_afcars.drop(columns=['Sex_x'])

In [42]:
wave_service_afcars = wave_service_afcars.rename(columns={'Sex_y': 'Sex'})

In [43]:
sex_columns = [ 'Sex']

In [45]:
Early_homeless = ['Homeless_w1']

In [46]:
race_ethnicity_columns = [
    'AmIAKN',       # American Indian/Alaska Native
    'Asian',
    'BlkAfrAm',     # Black or African American
    'HawaiiPI',     # Native Hawaiian or Pacific Islander
    'White',
    'RaceUnkn',     # Race Unknown
    'RaceDcln',     # Race Declined
    'HisOrgin',     # Hispanic Origin
    'Race',         # Possibly a summary or coded value
    'RaceEthn',     # Combined race/ethnicity value
    'Race_w1'       # Race at Wave 1
]

In [23]:
disability_columns = [
    'CLINDIS',       # Clinical Disability
    'DiagDis',       # Diagnosed Disability
    'SpecEdSv',      # Special Education Services
    'CHBEHPRB'       # Child Behavioral Problem
]


In [24]:
substance_abuse_columns = [
    'SubAbuse_w1',
    'SubAbuse_w23'
]


In [25]:
incarceration_columns = [
    'Incarc_w1',
    'Incarc_w23'
]

# Services

In [26]:
independent_living_service_columns = [
    'HousEdSv',     # Housing Education
    'HlthEdSv',     # Health Education (optional, could be included)
    'CareerSv',     # Career Preparation
    'EmplyTrSv',    # Employment Training
    'BudgetSv',     # Budgeting
    'FamSuppSv',    # Family Support
    'MentorSv'      # Mentoring
]

# Removal

In [27]:
removal_reason_columns = [
    'NEGLECT',
    'PHYABUSE',
    'SEXABUSE',
    'AAPARENT',     # Alleged alcohol abuse - parent
    'DAPARENT',     # Drug abuse - parent
    'AACHILD',      # Alcohol abuse - child
    'DACHILD',      # Drug abuse - child
    'CHILDIS',      # Child disability
    'CHBEHPRB',     # Child behavior problem
    'PRTSDIED',     # Parent(s) died
    'PRTSJAIL',     # Parent(s) incarcerated
    'NOCOPE',       # No coping skills
    'ABANDMNT',     # Abandonment
    'RELINQSH',     # Relinquishment
    'HOUSING'       # Housing
]


# Placement

In [28]:
# Placment type
#Find out what each variable means
placement_type_columns = [
    'PlacementType'
]

In [29]:
placement_instability_columns = [
    'PlacementInstability'  # Likely captures total placement changes
]

In [30]:
currentl_placement_setting = ['CURPLSET']

In [31]:
placement_instability_support = [
    'PlacementType',   # Can be used to track unique setting types
    'CURPLSET',        # Current placement setting
    'LatRemLOS'        # Length of stay in latest removal (can support instability interpretation)
]

In [32]:
months_in_foster_care_column = [
    'MonthsInCare'
]

# Models

# Descriptive Stats

In [47]:
# Combine all columns and remove duplicates
columns_to_describe = list(set(
    sex_columns + higher_education_columns + full_time_employment_columns +
    Emplyment_or_post_secondary + Homeless_wave_23 + race_ethnicity_columns +
    Early_homeless + disability_columns + substance_abuse_columns +
    incarceration_columns + independent_living_service_columns +
    removal_reason_columns + placement_type_columns + placement_instability_columns +
    currentl_placement_setting + placement_instability_support + months_in_foster_care_column
))

In [48]:
# Generate descriptive stats
descriptive_stats = wave_service_afcars[columns_to_describe].describe(include='all').T

In [49]:
descriptive_stats.head()

Unnamed: 0,count,unique,top,freq
CLINDIS,1253,3,2,698
PRTSDIED,1273,2,0,1263
HS_or_GED_w1,1661,2,0,1596
AmIAKN,1197,2,0,1194
Bach_Degree_w23,1661,1,0,1661


### Model 1: homelessness experience (wave 1) were associated with having post secondary eduction [control for factors prior to homeless]

### model 2: experiencing homelessness overall is associated w/ seaking out post-secondary education of a full tme job. 

## **check multicollinearity (VIF< 2)**