# Example Usage

In [1]:
import pathlib
import random

import numpy as np
import pandas as pd

# Local modules
import functions as f

In [2]:
# Where files should be saved/loaded
RAW_FOLDER = pathlib.Path("./data/raw_data/")
DIFF_FOLDER = pathlib.Path("./data/diff/")
DOF_FOLDER = pathlib.Path("./data/CA_DOF/")

## Create Tables

In [3]:
# Import the module that allows us to create tables
import generate_tables as gt

### Creating Estimates Tables (generate_tables.EstimatesTables)

In [4]:
# Creating consolidated files using the vintage 2020_06 at the region and jurisdiction level
# "_ =" to supress output
_ = gt.EstimatesTables().consolidate("2020_06", 
        geo_list=["region", "jurisdiction"], 
        save=True,
        save_folder=RAW_FOLDER)

In [5]:
# Creating individual files using the vintage 2020_06 at the region and jurisdiction level for the
# Estimates tables household_income and age_ethnicity
_ = gt.EstimatesTables().individual("2020_06",
        geo_list=["region", "jurisdiction"],
        est_table_list=["household_income", "age_ethnicity"],
        save=True,
        save_folder=RAW_FOLDER)

### Creating DOF Data Files (generate_tables.CA_DOF)

In [6]:
# Creating DOF Data file.
# NOTE: See class documentation for some manual steps you need to do before running this code.
# Using the default raw_data and save locations
_ = gt.CA_DOF().get_CA_DOF_data(
    years=range(2010, 2022),
    geo_list=["region", "jurisdiction"])

### Creating Diff Files (generate_tables.DiffFiles)

In [7]:
# First create consolidated and individual files using the same parameters as before but with 
# a vintage of 2021_01
_ = gt.EstimatesTables().consolidate("2021_01", 
        geo_list=["region", "jurisdiction"], 
        save=True,
        save_folder=RAW_FOLDER)
_ = gt.EstimatesTables().individual("2021_01",
        geo_list=["region", "jurisdiction"],
        est_table_list=["household_income", "age_ethnicity"],
        save=True,
        save_folder=RAW_FOLDER)

In [8]:
# Create diff files 
_ = gt.DiffFiles().create_diff_tables("2020_06", "2021_01", 
        geo_list=["region", "jurisdiction"],
        est_table_list=["consolidated", "household_income", "age_ethnicity"],
        raw_data_folder=RAW_FOLDER,
        save_folder=DIFF_FOLDER)

## Run Checks

In [9]:
# Import the module that allows us to run checks
import perform_checks as pc

### Run Check 1

In [10]:
# Run internal consistency checks
pc.InternalConsistency().check_geography_aggregations(RAW_FOLDER, "2020_06", geo_list=["jurisdiction"])

Aggregating jurisdiction level data to region and comparing with region csv file
No errors



In [11]:
# Intentionally intoduce an error in a one row to show what error output looks like
df = f.load(RAW_FOLDER, "2020_06", "jurisdiction", "consolidated")
df.at[0, "units"] = 0
f.save(df, RAW_FOLDER, "2020_06", "jurisdiction", "consolidated")

# Run internal consistency checks
pc.InternalConsistency().check_geography_aggregations(RAW_FOLDER, "2020_06", geo_list=["jurisdiction"])

# Fix the errors in the output by just downloading the file again
_ = gt.EstimatesTables().consolidate("2020_06", geo_list=["jurisdiction"], save=True, save_folder=RAW_FOLDER)

Aggregating jurisdiction level data to region and comparing with region csv file
      region  yr_id  Under 5  5 to 9  10 to 14  15 to 17  18 and 19  20 to 24  \
0  San Diego   2010   203423  194029    198716    128000      97095    270750   

   25 to 29  30 to 34  35 to 39  40 to 44  45 to 49  50 to 54  55 to 59  \
0    250737    220185    211012    209551    219795    210980    180305   

   60 and 61  62 to 64  65 to 69  70 to 74  75 to 79  80 to 84  85 and Older  \
0      64088     85223    103241     77313     64347     52564         53960   

   Hispanic  Non-Hispanic, White  Non-Hispanic, Black  \
0    991348              1500048               146600   

   Non-Hispanic, American Indian or Alaska Native  Non-Hispanic, Asian  \
0                                           14098               328058   

   Non-Hispanic, Hawaiian or Pacific Islander  Non-Hispanic, Other  \
0                                       13504                 6715   

   Non-Hispanic, Two or More Races  Les

### Run Check 2

In [12]:
# TODO

### Run Check 3

In [13]:
# TODO

### Run Check 4

In [14]:
# TODO

### Run Check 5

In [15]:
# N/A, done in Power BI

### Run Check 6

In [16]:
# TODO

### Run Check 7

In [17]:
# TODO