# Time Series Analysis

In [1]:
#import bigquery
from google.cloud import bigquery
from google.cloud import bigquery_storage
 
#other needed libraries
import os
import pandas as pd
import numpy as np
import pandas_gbq
import seaborn as sns
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
plt.style.use('ggplot')
color='#702A7D'

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/jupyter/.config/gcloud/application_default_credentials.json"

#Instatiate BigQuery Client
client = bigquery.Client()



In [6]:
# CPP Data
cpp_query = """
SELECT
  a.person_id_hashed, a.YearOfBirth
  ,a.PCArea, a.EthnicOrigin, a.CPP_Category
  ,a.StartDate, a.EndDate
  ,p.birth_datetime
FROM
    yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CPP AS a
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id_hashed = p.person_id_hashed
"""

# ------- CiC Data ---------------
cic_query = """
SELECT
  a.person_id_hashed, a.YearOfBirth
  ,a.PCArea_Home AS PCArea, a.EthnicOrigin
  ,a.StartDate, a.EndDate
  ,p.birth_datetime
FROM
  yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiC AS a

LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id_hashed = p.person_id_hashed

"""
# -------- CiNP Data ----------
cinp_query = """
SELECT
  a.person_id_hashed, a.YearOfBirth
  ,a.PCArea, a.EthnicOrigin
  ,a.StartDate, a.EndDate
  ,p.birth_datetime
FROM
  yhcr-prd-bradfor-bia-core.CB_2649.cb_bmbc_ChildrensSocialServices_CiNP AS a
  
LEFT JOIN
    yhcr-prd-bradfor-bia-core.CB_2649.person AS p
ON
    a.person_id_hashed = p.person_id_hashed

"""

# Load Datasets

In [7]:
cpp = pandas_gbq.read_gbq(cpp_query)
cic = pandas_gbq.read_gbq(cic_query)
cinp = pandas_gbq.read_gbq(cinp_query)

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [8]:
cic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1920 entries, 0 to 1919
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   person_id_hashed  1920 non-null   object             
 1   YearOfBirth       1920 non-null   Int64              
 2   PCArea            1920 non-null   object             
 3   EthnicOrigin      1920 non-null   object             
 4   StartDate         1920 non-null   dbdate             
 5   EndDate           963 non-null    dbdate             
 6   birth_datetime    1914 non-null   datetime64[us, UTC]
dtypes: Int64(1), datetime64[us, UTC](1), dbdate(2), object(3)
memory usage: 107.0+ KB


In [9]:
cpp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4112 entries, 0 to 4111
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   person_id_hashed  4112 non-null   object             
 1   YearOfBirth       4112 non-null   Int64              
 2   PCArea            4112 non-null   object             
 3   EthnicOrigin      4112 non-null   object             
 4   CPP_Category      4112 non-null   object             
 5   StartDate         4112 non-null   dbdate             
 6   EndDate           3216 non-null   dbdate             
 7   birth_datetime    4110 non-null   datetime64[us, UTC]
dtypes: Int64(1), datetime64[us, UTC](1), dbdate(2), object(4)
memory usage: 261.1+ KB


In [10]:
cpp

Unnamed: 0,person_id_hashed,YearOfBirth,PCArea,EthnicOrigin,CPP_Category,StartDate,EndDate,birth_datetime
0,C9B1DB2054F1D69B674BA78B879110113E7EBB6CF229D4...,2021,,Asian/British Asian - Pakistani,Neglect,2021-01-26,NaT,2021-02-15 00:00:00+00:00
1,CE05511AE9E947ED3248AD446BFCFBBCBB0E07FDC458A9...,2021,,White - British,Emotional Abuse,2021-01-12,NaT,2021-05-15 00:00:00+00:00
2,A53B7CB131455D1FF1868DD79CF73BCD273FC814D512FB...,2021,,White - British,Neglect,2021-05-06,NaT,2021-05-15 00:00:00+00:00
3,A8761FA45B1F1618DDB6D1AC16A4EF230E25BC0BF59273...,2021,,White - British,Emotional Abuse,2020-11-05,NaT,2021-04-15 00:00:00+00:00
4,FC64B02447FD8AA2E9CAC290A5BA7A183F70EBC9225EE3...,2021,,Other Ethnic Group,Emotional Abuse,2020-11-24,NaT,2021-03-15 00:00:00+00:00
...,...,...,...,...,...,...,...,...
4107,F8421E6A3784CAF7D7124908D432B670AFABB21A861889...,2014,OO00,Mixed - Other,Emotional Abuse,2020-01-13,2020-02-24,2014-11-15 00:00:00+00:00
4108,8EB35DC12431478CD6A15C71806766571495FE73C00E93...,2018,OO00,White - Eastern European,Neglect,2020-01-22,2021-02-01,2018-06-15 00:00:00+00:00
4109,F1C9E2AEAF1A72C1EB5B8FB92CAAAAE62D9FAC8CE90ED1...,2019,OO00,White - Eastern European,Neglect,2020-01-22,2021-02-01,2019-11-15 00:00:00+00:00
4110,78620C6F421174EA35621CCD7D4B37113A2FA745546585...,2006,OO00,Black/Black British - African,Physical Abuse,2019-02-19,2019-10-03,2006-02-15 00:00:00+00:00
