# Notebook to Export Data for Analysis

In [None]:
import os
import pandas as pd
import pandas_gbq
from google.clound import bigquery

# Set up Google Aplication Credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/jupyter/.config/gcloud/application_default_credentials.json"

# Instantiate Big Query
client = bigquery.Client()

## Load Demographic Analysis Data

In [None]:
# CPP Data
cpp_query = """
SELECT
  a.person_id, a.YearOfBirth
  ,a.PCArea, a.EthnicOrigin, a.CPP_Category
  ,a.StartDate, a.EndDate
  ,p.birth_datetime AS DateOfBirth
FROM
    yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CPP AS a
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id = p.person_id
"""

# ------- LAC Data ---------------

lac_query = """
WITH LatestAssessment AS (
    SELECT
        a.person_id,
        a.StartDate,
        ROW_NUMBER() OVER (PARTITION BY a.person_id ORDER BY a.StartDate DESC) as rn,
        f.category
    FROM
        yhcr-prd-bradfor-bia-core.CB_2649.tbl_bmbc_Childrens_Social_Services_Assessments AS a
    LEFT JOIN
        yhcr-prd-bradfor-bia-core.CB_2649.cb_FactorLookup AS f
        ON a.factorid = f.factorid
)
SELECT
  a.person_id, a.YearOfBirth
  ,a.PCArea_Home AS PCArea, a.EthnicOrigin
  ,a.StartDate, a.EndDate
  ,p.birth_datetime AS DateOfBirth
  ,la.category AS Category
FROM
  yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiC AS a

LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id = p.person_id
LEFT JOIN
    LatestAssessment la
    ON a.person_id = la.person_id
    AND la.rn = 1;
"""
# -------- CiNP Data ----------
cinp_query = """
WITH LatestAssessment AS (
    SELECT
        a.person_id,
        a.StartDate,
        ROW_NUMBER() OVER (PARTITION BY a.person_id ORDER BY a.StartDate DESC) as rn,
        f.category
    FROM
        yhcr-prd-bradfor-bia-core.CB_2649.tbl_bmbc_Childrens_Social_Services_Assessments AS a
    LEFT JOIN
        yhcr-prd-bradfor-bia-core.CB_2649.cb_FactorLookup AS f
        ON a.factorid = f.factorid
)
SELECT
  a.person_id, a.YearOfBirth
  ,a.PCArea, a.EthnicOrigin
  ,a.StartDate, a.EndDate
  ,p.birth_datetime AS DateOfBirth
  ,la.category AS Category
FROM
  yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiNP AS a
  
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id = p.person_id
LEFT JOIN
    LatestAssessment la
    ON a.person_id = la.person_id
    AND la.rn = 1;
"""

## Export Data to CSV

In [None]:
print("Querying CPP data...")
cpp = pandas_gbq.read_gbq(cpp_query)
print(f"Loaded {len(cpp_df)} rows for CPP.")

print("Saving CPP data to CSV...")
cpp.to_csv("data/cpp.csv", index=False) 

print("Querying LAC data...")
lac = pandas_gbq.read_gbq(cic_query)
print(f"Loaded {len(lac)} rows for LAC.")

print("Saving LAC data to CSV...")
lac.to_csv("data/lac.csv", index=False)

print("Querying CiNP data...")
cinp = pandas_gbq.read_gbq(cinp_query)
print(f"Loaded {len(cinp)} rows for CiNP.")

print("Saving CiNP data to CSV...")
cinp.to_csv("data/cinp.csv", index=False)

print("Data export complete!")

## Load LSOA Analysis Data

In [None]:
# LAC Query
lac_query = """
WITH LatestAssessment AS (
    SELECT
        a.person_id,
        a.StartDate,
        ROW_NUMBER() OVER (PARTITION BY a.person_id ORDER BY a.StartDate DESC) as rn,
        f.category
    FROM
        yhcr-prd-bradfor-bia-core.CB_2649.tbl_bmbc_Childrens_Social_Services_Assessments AS a
    LEFT JOIN
        yhcr-prd-bradfor-bia-core.CB_2649.cb_FactorLookup AS f
        ON a.factorid = f.factorid
)

SELECT
    cic.person_id,
    cic.PCArea_Home AS PCArea,
    cic.StartDate,
    cic.EndDate,
    cic.EthnicOrigin,
    p.birth_datetime AS DateOfBirth,
    lsoa.LSOA,
    la.category as Category
FROM
    yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiC AS cic
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
    ON cic.person_id = p.person_id
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.personLSOA AS lsoa
    ON cic.person_id = lsoa.person_id
LEFT JOIN
    LatestAssessment la
    ON cic.person_id = la.person_id
    AND la.rn = 1;
"""

# ----- CPP Query

cpp_query = """
WITH LatestAssessment AS (
    SELECT
        a.person_id,
        a.StartDate,
        ROW_NUMBER() OVER (PARTITION BY a.person_id ORDER BY a.StartDate DESC) as rn,
        f.category
    FROM
        yhcr-prd-bradfor-bia-core.CB_2649.tbl_bmbc_Childrens_Social_Services_Assessments AS a
    LEFT JOIN
        yhcr-prd-bradfor-bia-core.CB_2649.cb_FactorLookup AS f
        ON a.factorid = f.factorid
)

SELECT
    cpp.person_id,
    cpp.PCArea_Home AS PCArea,
    cpp.StartDate,
    cpp.EndDate,
    cpp.EthnicOrigin,
    p.birth_datetime AS DateOfBirth,
    lsoa.LSOA,
    la.category as Category
FROM
    yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CPP AS cpp
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
    ON cpp.person_id = p.person_id
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.personLSOA AS lsoa
    ON cpp.person_id = lsoa.person_id
LEFT JOIN
    LatestAssessment la
    ON cpp.person_id = la.person_id
    AND la.rn = 1;
"""

# ---- CiNP Query

cinp_query = """
WITH LatestAssessment AS (
    SELECT
        a.person_id,
        a.StartDate,
        ROW_NUMBER() OVER (PARTITION BY a.person_id ORDER BY a.StartDate DESC) as rn,
        f.category
    FROM
        yhcr-prd-bradfor-bia-core.CB_2649.tbl_bmbc_Childrens_Social_Services_Assessments AS a
    LEFT JOIN
        yhcr-prd-bradfor-bia-core.CB_2649.cb_FactorLookup AS f
        ON a.factorid = f.factorid
)

SELECT
    cinp.person_id,
    cinp.PCArea_Home AS PCArea,
    cinp.StartDate,
    cinp.EndDate,
    cinp.EthnicOrigin,
    p.birth_datetime AS DateOfBirth,
    lsoa.LSOA,
    la.category as Category
FROM
    yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiNP AS cinp
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
    ON cinp.person_id = p.person_id
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.personLSOA AS lsoa
    ON cinp.person_id = lsoa.person_id
LEFT JOIN
    LatestAssessment la
    ON cinp.person_id = la.person_id
    AND la.rn = 1;
"""

## Export Data to CSV

In [None]:
print("Querying LAC_LSOA data...")
lac_lsoa = pandas_gbq.read_gbq(lac_query)
print(f"Loaded {len(lac_lsoa)} rows for LAC.")

print("Saving LAC_LSOA data to CSV...")
lac_lsoa.to_csv("../data/lac_lsoa.csv", index=False)

print("Querying CPP_LSOA data...")
cpp_lsoa = pandas_gbq.read_gbq(cpp_query)
print(f"Loaded {len(cpp_lsoa)} rows for CPP.")

print("Saving CPP_LSOA data to CSV...")
cpp_lsoa.to_csv("../data/cpp_lsoa.csv", index=False)

print("Querying CiNP_LSOA data...")
cinp_lsoa = pandas_gbq.read_gbq(cinp_query)
print(f"Loaded {len(cinp_lsoa)} rows for CINP.")

print("Saving CiNP_LSOA data to CSV...")
cinp_lsoa.to_csv("../data/cinp_lsoa.csv", index=False)