In [1]:
# import os
# os.chdir("birthrate_mtgp")
from jax import numpy as jnp
import numpy as np
import numpyro.distributions as dist
import jax.numpy as jnp
import numpyro
from numpyro.handlers import scope

  from .autonotebook import tqdm as notebook_tqdm


## Age

In [3]:
import pandas as pd

# File paths
path_fertility = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data with 2024.csv"
path_2024 = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/2024 Mother Age.csv"

# Step 1: Load both datasets
df_fert = pd.read_csv(path_fertility)
df_2024 = pd.read_csv(path_2024)

# Step 2: Standardize column names
df_2024.columns = df_2024.columns.str.strip().str.lower()
df_2024.rename(columns={'month': 'month', 'state': 'state', 'motherage': 'age', 'births': 'births'}, inplace=True)

# Step 3: Map age groups to match target columns
def map_age_group(age):
    if age in ['15-19', '20-24']:
        return 'births_age1524'
    elif age in ['25-29', '30-34']:
        return 'births_age2534'
    elif age in ['35-39', '40-44']:
        return 'births_age3544'
    else:
        return None

df_2024['age_group'] = df_2024['age'].apply(map_age_group)
df_2024 = df_2024.dropna(subset=['age_group'])

# Step 4: Compute bimonthly code (bmcode: 1 for Jan-Feb, ..., 6 for Nov-Dec)
df_2024['bmcode'] = ((df_2024['month'] - 1) // 2 + 1).astype(int)

# Step 5: Aggregate to state-bmcode-age_group level
df_agg = df_2024.groupby(['state', 'bmcode', 'age_group'])['births'].sum().unstack('age_group').reset_index()
df_agg['year'] = 2023

# Step 6: Reorder and match column order with original dataset
df_agg = df_agg[['state', 'year', 'bmcode', 'births_age1524', 'births_age2534', 'births_age3544']]

# Step 7: Replace 2023 rows in original data
df_fert_no2023 = df_fert[df_fert['year'] != 2023]
df_updated = pd.concat([df_fert_no2023, df_agg], ignore_index=True)

# Step 8: Save updated dataset
output_path = path_fertility.replace("fertility_data with 2024.csv", "fertility_data_updated.csv")
df_updated.to_csv(output_path, index=False)
print("✅ Updated data saved to:", output_path)


✅ Updated data saved to: /Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated.csv


## Marital Status

In [3]:
import pandas as pd

# File paths
fertility_updated_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated.csv"
marital_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/2024 Marital Status.csv"

# Step 1: Load datasets
df_fert = pd.read_csv(fertility_updated_path)
df_marital = pd.read_csv(marital_path)

# Step 2: Standardize column names
df_marital.columns = df_marital.columns.str.strip().str.lower()

# Step 3: Map marital status to correct column name
df_marital['marital_col'] = df_marital['marital_status'].map({
    'Married': 'births_married',
    'Unmarried': 'births_unmarried'
})

# Drop unknown statuses
df_marital = df_marital.dropna(subset=['marital_col'])

# Step 4: Calculate bmcode (1 to 6)
df_marital['bmcode'] = ((df_marital['month'] - 1) // 2 + 1).astype(int)

# Step 5: Pivot to wide format: one row per (state, bmcode), columns: births_married, births_unmarried
df_marital_pivot = (
    df_marital
    .groupby(['state', 'bmcode', 'marital_col'])['births']
    .sum()
    .unstack('marital_col')
    .reset_index()
)

# Add year column
df_marital_pivot['year'] = 2023

# Step 6: Keep relevant columns in order
df_marital_pivot = df_marital_pivot[['state', 'year', 'bmcode', 'births_married', 'births_unmarried']]

# Step 7: Merge with previous data
df_fert_no2023 = df_fert[df_fert['year'] != 2023]

# First merge previous age-group replacement
df_fert_2023 = df_fert[df_fert['year'] == 2023].drop(columns=['births_married', 'births_unmarried'], errors='ignore')

# Now merge with marital data
df_merged = pd.merge(
    df_fert_2023,
    df_marital_pivot,
    on=['state', 'year', 'bmcode'],
    how='left'
)

# Step 8: Combine everything
df_final = pd.concat([df_fert_no2023, df_merged], ignore_index=True)

# Step 9: Save final dataset
output_path = fertility_updated_path.replace("fertility_data_updated.csv", "fertility_data_updated_v2.csv")
df_final.to_csv(output_path, index=False)
print("✅ Final updated file saved to:", output_path)


✅ Final updated file saved to: /Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v2.csv


## Insurance

In [4]:
import pandas as pd

# File paths
fertility_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v2.csv"
insurance_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/2024 Insurance.csv"

# Step 1: Load data
df_fert = pd.read_csv(fertility_path)
df_ins = pd.read_csv(insurance_path)

# Step 2: Standardize column names
df_ins.columns = df_ins.columns.str.strip().str.lower()
if 'insurance' not in df_ins.columns:
    print("❌ Error: Could not find 'insurance' column. Found:", df_ins.columns.tolist())
    raise

# Step 3: Classify insurance types
df_ins['insurance_group'] = df_ins['insurance'].apply(
    lambda x: 'births_medicaid' if x.strip().lower() == 'medicaid' else 'births_nonmedicaid'
)

# Step 4: Compute bmcode (Jan-Feb = 1, Mar-Apr = 2, ..., Nov-Dec = 6)
df_ins['bmcode'] = ((df_ins['month'] - 1) // 2 + 1).astype(int)

# Step 5: Aggregate by state-bmcode-insurance_group
df_ins_agg = (
    df_ins
    .groupby(['state', 'bmcode', 'insurance_group'])['births']
    .sum()
    .unstack('insurance_group')
    .reset_index()
)

df_ins_agg['year'] = 2023

# Step 6: Reorder columns
df_ins_agg = df_ins_agg[['state', 'year', 'bmcode', 'births_medicaid', 'births_nonmedicaid']]

# Step 7: Separate and merge with existing 2023 data
df_fert_no2023 = df_fert[df_fert['year'] != 2023]
df_fert_2023 = df_fert[df_fert['year'] == 2023].drop(columns=['births_medicaid', 'births_nonmedicaid'], errors='ignore')

# Merge on keys
df_merged = pd.merge(df_fert_2023, df_ins_agg, on=['state', 'year', 'bmcode'], how='left')

# Step 8: Combine all rows and save
df_final = pd.concat([df_fert_no2023, df_merged], ignore_index=True)

output_path = fertility_path.replace("fertility_data_updated_v2.csv", "fertility_data_updated_v3.csv")
df_final.to_csv(output_path, index=False)
print("✅ Insurance update complete. Saved to:", output_path)


✅ Insurance update complete. Saved to: /Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v3.csv


## Total

In [6]:
import pandas as pd

# File paths
fertility_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v3.csv"
total_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/2024 Total Births.csv"

# Step 1: Load data
df_fert = pd.read_csv(fertility_path)
df_total = pd.read_csv(total_path)

# Step 2: Standardize column names
df_total.columns = df_total.columns.str.strip().str.lower()

# Step 3: Compute bimonthly code
df_total['bmcode'] = ((df_total['month'] - 1) // 2 + 1).astype(int)

# Step 4: Aggregate births by state and bmcode
df_total_agg = df_total.groupby(['state', 'bmcode'])['births'].sum().reset_index()
df_total_agg['year'] = 2023
df_total_agg.rename(columns={'births': 'births_total'}, inplace=True)

# Step 5: Prepare the 2023 portion of fertility data
df_fert_no2023 = df_fert[df_fert['year'] != 2023]
df_fert_2023 = df_fert[df_fert['year'] == 2023].drop(columns=['births_total'], errors='ignore')

# Step 6: Merge the new total births into 2023 portion
df_fert_2023_updated = pd.merge(df_fert_2023, df_total_agg, on=['state', 'year', 'bmcode'], how='left')

# Step 7: Combine and save
df_final = pd.concat([df_fert_no2023, df_fert_2023_updated], ignore_index=True)

output_path = fertility_path.replace("fertility_data_updated_v3.csv", "fertility_data_updated_v4.csv")
df_final.to_csv(output_path, index=False)
print("✅ Total births updated and saved to:", output_path)


✅ Total births updated and saved to: /Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v4.csv


## Age Group

In [12]:
import pandas as pd

# File paths
fertility_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v4.csv"
pop_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/2024 Population by Age.csv"

# Step 1: Load datasets
df_fert = pd.read_csv(fertility_path)
df_pop = pd.read_csv(pop_path)

# Step 2: Standardize column names
df_pop.columns = df_pop.columns.str.strip().str.lower()
df_pop.rename(columns={'sate': 'state', 'age group': 'age_group'}, inplace=True)

# Step 3: Map age groups to fertility categories
def map_age_group(age):
    if age in ['15-19', '20-24']:
        return 'pop_age1524'
    elif age in ['25-29', '30-34']:
        return 'pop_age2534'
    elif age in ['35-39', '40-44']:
        return 'pop_age3544'
    else:
        return None

df_pop['pop_group'] = df_pop['age_group'].apply(map_age_group)
df_pop = df_pop.dropna(subset=['pop_group'])

# Step 4: Aggregate to state + pop_group
df_pop_agg = (
    df_pop.groupby(['state', 'pop_group'])['population']
    .sum()
    .unstack('pop_group')
    .reset_index()
)
df_pop_agg['year'] = 2023  # Population for all bmcode in 2023

# Step 5: Apply this to every bmcode row in 2023 fertility data
df_fert_no2023 = df_fert[df_fert['year'] != 2023]
df_fert_2023 = df_fert[df_fert['year'] == 2023].drop(columns=['pop_age1524', 'pop_age2534', 'pop_age3544'], errors='ignore')

# Merge: many-to-one on state and year
df_fert_2023_updated = pd.merge(df_fert_2023, df_pop_agg, on=['state', 'year'], how='left')

# Step 6: Combine and save
df_final = pd.concat([df_fert_no2023, df_fert_2023_updated], ignore_index=True)

output_path = fertility_path.replace("fertility_data_updated_v4.csv", "fertility_data_updated_v5.csv")
df_final.to_csv(output_path, index=False)
print("✅ Population updated and saved to:", output_path)


✅ Population updated and saved to: /Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v5.csv


## Total

In [10]:
import pandas as pd

# File paths
fertility_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v5.csv"
pop_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/2024 Population by Age.csv"

# Step 1: Load datasets
df_fert = pd.read_csv(fertility_path)
df_pop = pd.read_csv(pop_path)

# Step 2: Standardize column names
df_pop.columns = df_pop.columns.str.strip().str.lower()
df_pop.rename(columns={'sate': 'state', 'age group': 'age_group'}, inplace=True)

# Step 3: Sum all population by state (regardless of age group)
df_total_pop = df_pop.groupby('state')['population'].sum().reset_index()
df_total_pop['year'] = 2023
df_total_pop.rename(columns={'population': 'pop_total'}, inplace=True)

# Step 4: Prepare 2023 data
df_fert_no2023 = df_fert[df_fert['year'] != 2023]
df_fert_2023 = df_fert[df_fert['year'] == 2023].drop(columns=['pop_total'], errors='ignore')

# Step 5: Merge total population to 2023 fertility rows
df_fert_2023_updated = pd.merge(df_fert_2023, df_total_pop, on=['state', 'year'], how='left')

# Step 6: Concatenate and save
df_final = pd.concat([df_fert_no2023, df_fert_2023_updated], ignore_index=True)

output_path = fertility_path.replace("fertility_data_updated_v5.csv", "fertility_data_updated_v6.csv")
df_final.to_csv(output_path, index=False)
print("✅ pop_total updated and saved to:", output_path)


✅ pop_total updated and saved to: /Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v6.csv


In [None]:
import pandas as pd

# File paths (adjust if needed)
original_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data with 2024.csv"
updated_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v6.csv"
output_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_birth_update.csv"

# Load datasets
df_original = pd.read_csv(original_path)
df_updated = pd.read_csv(updated_path)

# Only update these columns (exclude population columns)
columns_to_update = [
    'births_age1524', 'births_age2534', 'births_age3544',
    'births_married', 'births_unmarried',
    'births_medicaid', 'births_nonmedicaid',
    'births_total'
]

# Filter to 2023 rows
df_original_2023 = df_original[df_original['year'] == 2023]
df_non_2023 = df_original[df_original['year'] != 2023]
df_updated_2023 = df_updated[df_updated['year'] == 2023][['state', 'year', 'bmcode'] + columns_to_update]

# Merge updates into 2023 data
df_merged_2023 = pd.merge(df_original_2023, df_updated_2023, on=['state', 'year', 'bmcode'], how='left', suffixes=('', '_new'))

# Replace old values with updated ones
for col in columns_to_update:
    new_col = f"{col}_new"
    if new_col in df_merged_2023.columns:
        df_merged_2023[col] = df_merged_2023[new_col]
        df_merged_2023.drop(columns=[new_col], inplace=True)

# Combine and save
df_final = pd.concat([df_non_2023, df_merged_2023], ignore_index=True)
df_final.to_csv(output_path, index=False)

print(f"✅ Saved updated file as: {output_path}")


In [None]:
import pandas as pd

# File paths
original_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data with 2024.csv"
updated_path = original_path.replace("fertility_data with 2024.csv", "fertility_data_updated_v6.csv")
final_output_path = original_path.replace("fertility_data with 2024.csv", "fertility_data_fully_updated.csv")

# Load original and updated datasets
df_original = pd.read_csv(original_path)
df_updated = pd.read_csv(updated_path)

# Separate out the 2023 data in the original file
df_original_no2023 = df_original[df_original['year'] != 2023]

# Combine with the updated 2023 data
df_updated_2023 = df_updated[df_updated['year'] == 2023]

# Concatenate and save final output
df_final = pd.concat([df_original_no2023, df_updated_2023], ignore_index=True)
df_final.to_csv(final_output_path, index=False)

import ace_tools as tools; tools.display_dataframe_to_user(name="Fully Updated Fertility Data", dataframe=df_final)


## Race

In [45]:
files = [
    "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/race 2024/2024 births_hisp.csv",
    "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/race 2024/2024 births_nhblack.csv",
    "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/race 2024/2024 births_nhwhite.csv"
]

df0 = pd.concat([pd.read_csv(f) for f in files]).pivot(index=["state","month"], columns="race", values="births").reset_index()
df0.to_csv("/Users/shaokangyang/Downloads/2024_births_merged.csv", index=False)


In [46]:
df = pd.read_csv("/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_birth_update.csv")
df2023 = pd.read_csv("/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data.csv")

In [47]:
# Map month to bmonth (1=Jan/Feb ... 6=Nov/Dec)
df0["bmcode"] = ((df0["month"] - 1) // 2) + 1

# Merge on state + bmonth for year 2024
df.update(df[df.year == 2024]
    .drop(columns=["births_hisp","births_nhblack","births_nhwhite"])
    .merge(df.rename(columns={"hisp":"births_hisp","nhblack":"births_nhblack","nhwhite":"births_nhwhite"}),
           on=["state","bmcode"], how="left"))
df.loc[df.year == 2024, "births_otherraceeth"] = (df["births_total"] - df["births_hisp"] - df["births_nhblack"] - df["births_nhwhite"]) # update total births


In [43]:
df2023[['state', 'year', 'bmcode', "births_hisp","births_nhblack","births_nhwhite","births_otherraceeth", "births_total"]]

Unnamed: 0,state,year,bmcode,births_hisp,births_nhblack,births_nhwhite,births_otherraceeth,births_total
0,Alabama,2016,1,729.0,2837.0,5627.0,316.0,9509.0
1,Alabama,2016,2,733.0,2727.0,5562.0,312.0,9334.0
2,Alabama,2016,3,725.0,2841.0,5978.0,288.0,9832.0
3,Alabama,2016,4,805.0,3071.0,6113.0,364.0,10353.0
4,Alabama,2016,5,795.0,3043.0,6005.0,360.0,10203.0
...,...,...,...,...,...,...,...,...
2494,Wyoming,2023,3,150.0,,812.0,,1051.0
2495,Wyoming,2023,4,143.0,,817.0,,1065.0
2496,Wyoming,2023,5,153.0,,743.0,,986.0
2497,Wyoming,2023,6,126.0,10.0,714.0,90.0,940.0


In [20]:
import pandas as pd

# Re-define paths after kernel reset
original_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data with 2024.csv"
updated_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_updated_v6.csv"
final_output_path = "/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_fully_updated2.csv"

# Load original and updated datasets
df_original = pd.read_csv(original_path)
df_updated = pd.read_csv(updated_path)

# Columns to update
columns_to_update = [
    'births_age1524', 'births_age2534', 'births_age3544',
    'births_married', 'births_unmarried',
    'births_medicaid', 'births_nonmedicaid',
    'births_total',
    'pop_age1524', 'pop_age2534', 'pop_age3544', 'pop_total'
]

# Separate 2023 data
df_original_2023 = df_original[df_original['year'] == 2023]
df_non_2023 = df_original[df_original['year'] != 2023]
df_updated_2023 = df_updated[df_updated['year'] == 2023][['state', 'year', 'bmcode'] + columns_to_update]

# Merge while preserving other columns
df_merged_2023 = pd.merge(df_original_2023, df_updated_2023, on=['state', 'year', 'bmcode'], how='left', suffixes=('', '_new'))

# Replace updated columns
for col in columns_to_update:
    new_col = f"{col}_new"
    if new_col in df_merged_2023.columns:
        df_merged_2023[col] = df_merged_2023[new_col]
        df_merged_2023.drop(columns=[new_col], inplace=True)

# Recombine and save
df_final = pd.concat([df_non_2023, df_merged_2023], ignore_index=True)
df_final.to_csv(final_output_path, index=False)

import ace_tools as tools; tools.display_dataframe_to_user(name="Selective 2023 Update", dataframe=df_final)


ModuleNotFoundError: No module named 'ace_tools'

In [24]:
import pandas as pd

# load the "total" subgroup file
df = pd.read_csv("/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/fertility_results/2024/NB_births_total_6_through_june.csv")

# list all ypred columns
ypred_cols = [c for c in df.columns if c.startswith("ypred")]

# mean prediction for state 1, quarter 1
print("Mean ypred[1,1,1]:", df["ypred[1,1,48]"].mean())

# overall mean across all states & times
print("Overall mean prediction:", df[ypred_cols].values.mean())


Mean ypred[1,1,1]: 9336.086
Overall mean prediction: 12175.123730800653


In [4]:
df1 = pd.read_csv("/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data.csv")
df2 = pd.read_csv("/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_birth_update.csv")

In [5]:
import pandas as pd

# Example: ensure df2's policy_on, exposed_births, dobbs_code match df1
cols_to_update = ["policy_on", "exposed_births", "dobbs_code"]

df2 = df2.drop(columns=cols_to_update, errors="ignore").merge(
    df1[["state", "year", "bmcode"] + cols_to_update],
    on=["state", "year", "bmcode"],
    how="left"
)


In [11]:
df2.to_csv("/Users/shaokangyang/Library/CloudStorage/GoogleDrive-sky.ang510@gmail.com/My Drive/Code/dobbs_fertility/data/fertility_data_birth_update.csv",index=False)

In [9]:
df1.describe()

Unnamed: 0,year,bmcode,births_age1524,births_age2534,births_age3544,births_nohs,births_hs,births_somecoll,births_coll,births_married,...,pop_somecoll,pop_coll,pop_married,pop_unmarried,pop_age1524,pop_age2534,pop_age3544,policy_on,exposed_births,dobbs_code
count,2499.0,2499.0,2448.0,2448.0,2448.0,2448.0,2448.0,2448.0,2448.0,2412.0,...,2499.0,2499.0,2499.0,2499.0,2499.0,2499.0,2499.0,2448.0,2499.0,2142.0
mean,2019.591837,3.44898,2824.558007,7060.24183,2284.300245,1459.725899,3135.892565,3325.332108,4070.979575,6639.875622,...,310414.5,401369.2,491878.4,776724.2,418164.0,437986.8,412383.9,0.055556,0.039216,0.27451
std,2.355825,1.727219,3222.248529,7991.012564,3057.111689,1847.997979,3720.93413,3656.871243,4598.127448,6896.744852,...,357409.1,473773.2,561215.5,913821.6,475562.0,517402.4,479417.9,0.229108,0.194147,0.446371
min,2016.0,1.0,98.0,438.0,123.0,46.0,160.0,163.0,254.0,90.0,...,17566.65,25385.0,37917.0,56194.0,35347.0,34681.44,34057.0,0.0,0.0,0.0
25%,2018.0,2.0,833.0,1682.0,519.25,400.5,834.0,993.25,894.0,1657.75,...,87767.0,92897.0,128293.1,185581.5,107998.0,105079.0,105757.5,0.0,0.0,0.0
50%,2020.0,3.0,2004.0,4994.0,1280.5,1003.5,2023.0,2554.5,2469.0,4863.0,...,217691.0,240710.0,348965.0,504296.0,288253.0,290773.0,284884.0,0.0,0.0,0.0
75%,2022.0,5.0,3495.5,8323.25,2911.0,1675.75,3818.5,3957.0,5339.5,8415.0,...,354333.0,550604.5,646848.6,913428.0,474636.5,549328.5,486345.5,0.0,0.0,1.0
max,2024.0,6.0,21744.0,49042.0,19493.0,13024.0,21410.0,23035.0,25989.0,53083.0,...,2118518.0,2765170.0,3105761.0,5085989.0,2616379.0,2926700.0,2616635.0,1.0,1.0,1.0


In [10]:
df2.describe()

Unnamed: 0,year,bmcode,births_age1524,births_age2534,births_age3544,births_nohs,births_hs,births_somecoll,births_coll,births_married,...,pop_somecoll,pop_coll,pop_married,pop_unmarried,pop_age1524,pop_age2534,pop_age3544,policy_on,exposed_births,dobbs_code
count,2499.0,2499.0,2440.0,2440.0,2440.0,2440.0,2440.0,2440.0,2440.0,2404.0,...,2499.0,2499.0,2499.0,2499.0,2499.0,2499.0,2499.0,2448.0,2499.0,2142.0
mean,2019.591837,3.44898,2811.465574,7041.10082,2282.945082,1453.296721,3127.319262,3314.325,4057.122951,6623.8698,...,310414.5,401369.2,491878.4,776724.2,418164.0,437986.8,412383.9,0.055556,0.039216,0.27451
std,2.355825,1.727219,3196.252809,7935.949509,3046.361013,1829.517886,3699.476167,3629.111336,4570.611078,6820.825572,...,357409.1,473773.2,561215.5,913821.6,475562.0,517402.4,479417.9,0.229108,0.194147,0.446371
min,2016.0,1.0,103.0,434.0,123.0,46.0,160.0,163.0,254.0,90.0,...,17566.65,25385.0,37917.0,56194.0,35347.0,34681.44,34057.0,0.0,0.0,0.0
25%,2018.0,2.0,828.0,1671.25,513.75,398.5,831.5,980.75,893.5,1656.0,...,87767.0,92897.0,128293.1,185581.5,107998.0,105079.0,105757.5,0.0,0.0,0.0
50%,2020.0,3.0,2004.0,4995.0,1271.5,1003.5,2023.0,2554.5,2463.5,4863.0,...,217691.0,240710.0,348965.0,504296.0,288253.0,290773.0,284884.0,0.0,0.0,0.0
75%,2022.0,5.0,3495.5,8334.75,2918.0,1675.75,3818.5,3957.0,5339.5,8433.75,...,354333.0,550604.5,646848.6,913428.0,474636.5,549328.5,486345.5,0.0,0.0,1.0
max,2024.0,6.0,21744.0,49042.0,19493.0,13024.0,21410.0,23035.0,25989.0,53083.0,...,2118518.0,2765170.0,3105761.0,5085989.0,2616379.0,2926700.0,2616635.0,1.0,1.0,1.0
