# Child in Need Plan (CiNP) - Demographic Analysis

In [None]:
#import bigquery
from google.cloud import bigquery
from google.cloud import bigquery_storage
 
#other needed libraries
import os
import pandas as pd
import numpy as np
import pandas_gbq
import seaborn as sns
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings('ignore')
plt.style.use('tableau-colorblind10')
color='#702A7D'
grey_color = '#A9A9A9'


os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/jupyter/.config/gcloud/application_default_credentials.json"

#Instatiate BigQuery Client
client = bigquery.Client()

In [None]:
cinp_query = """
SELECT
  a.person_id, a.YearOfBirth
  ,a.PCArea, a.EthnicOrigin
  ,a.StartDate, a.EndDate
  ,p.birth_datetime AS DateOfBirth
  ,p.gender_source_value AS Gender
FROM
    yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiNP AS a
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id = p.person_id
"""

In [None]:
cinp = pandas_gbq.read_gbq(cinp_query)
cinp.info()

# Import Project functions

In [None]:
import sys
sys.path.append('../code')

from data_cleaning import fill_missing_dob, convert_dates_datatype, drop_duplicates, gender_mapping

from demographic_analysis_helpers import (
    analyse_person_ids,
    add_calculated_columns,
    plot_distributions,
    plot_age_distribution,
    plot_monthly_trends,
    plot_monthly_entries_exits,
    plot_running_total_in_intervention,
    plot_duration_in_intervention,
    plot_average_duration_by_age,
    plot_median_duration_by_age,
    plot_median_intervention_duration_over_time
)

In [None]:
# Fill Missing DoB
cinp = fill_missing_dob(cinp, 'YearOfBirth', 'DateOfBirth')

# Convert Date columns datatype
date_columns = ['StartDate', 'DateOfBirth', 'EndDate']
cinp = convert_dates_datatype(cinp, date_columns)

# Drop duplicates
drop_duplicates(cinp)

# Clean Gender column
gender_mapping(cinp, 'Gender')

# Remove the only 1 data point of December 2021
cinp = cinp.loc[~(cinp['EndDate'] > '2021-11-30')]

# Add calculated columns
cinp = add_calculated_columns(cinp)

cinp.info()

## Exploratory Analysis

In [None]:
# Plot Categorical Distributions
plot_distributions(cinp, 'CiNP')

In [None]:
# Plot age distribution
plot_age_distribution(cinp, 'CiNP')

In [None]:
# Plot monthly trends
plot_monthly_trends(cinp, 'CiNP', 'StartDate')

In [None]:
# Plot monthly ebtries and exits trends
plot_monthly_entries_exits(cinp, 'StartDate', 'EndDate', 'CiNP')

## Entries into CiNP for Each Financial Year

In [None]:
# Define financial years
financial_years = [
    ('2019/04-2020/03', '2019-04-01', '2020-03-31'),
    ('2020/04-2021/03', '2020-04-01', '2021-03-31'),
]

# Calculate entries for each financial year
entries_by_fy = []
for fy_label, start_date, end_date in financial_years:
    mask = (cinp['StartDate'] >= start_date) & (cinp['StartDate'] <= end_date)
    cinp_entries = cinp[mask].shape[0]
    entries_by_fy.append({
        'Financial Year': fy_label,
        'Total Entries': cinp_entries
    })

# Create a DataFrame for the results
result_df = pd.DataFrame(entries_by_fy)

# Display as a formatted table
print("\nTotal Entries by Financial Year:")
result_df

In [None]:
# Plot demand in intervention
plot_running_total_in_intervention(cinp, 'StartDate', 'EndDate', 'CiNP')

In [None]:
# Plot duration in intervention
plot_duration_in_intervention(cinp, 'StartDate', 'EndDate', 'CiNP')

In [None]:
# # Plot average duration in intervention
plot_average_duration_by_age(cinp, 'CiNP')

In [None]:
# Plot median duration by age group
plot_median_duration_by_age(cinp, 'CiNP')

In [None]:
# Plot median duration over time
plot_median_intervention_duration_over_time(cinp, 'CiNP', 'EndDate',
                                                'num_of_days_in_intervention',
                                                time_freq='Y')