# Example Usage

In [17]:
import pathlib
import random

import numpy as np
import pandas as pd

# Local modules
import functions as f

In [18]:
# Where files should be saved/loaded
RAW_FOLDER = pathlib.Path("./data/raw_data/")
DIFF_FOLDER = pathlib.Path("./data/diff/")
DOF_FOLDER = pathlib.Path("./data/CA_DOF/")

## Create Tables

In [19]:
# Import the module that allows us to create tables
import generate_tables as gt

### Creating Estimates Tables (generate_tables.EstimatesTables)

In [4]:
# Creating consolidated files using the vintage 2020_06 at the region and jurisdiction level
# "_ =" to supress output
_ = gt.EstimatesTables().consolidate("2020_06", 
        geo_list=["region", "jurisdiction"], 
        save=True,
        save_folder=RAW_FOLDER)

In [5]:
# Creating individual files using the vintage 2020_06 at the region and jurisdiction level for the
# Estimates tables household_income, age_ethnicity, and population
_ = gt.EstimatesTables().individual("2020_06",
        geo_list=["region", "jurisdiction"],
        est_table_list=["household_income", "age_ethnicity", "population"],
        save=True,
        save_folder=RAW_FOLDER)

### Creating DOF Data Files (generate_tables.CA_DOF)

In [6]:
# Creating DOF Data file.
# NOTE: See class documentation for some manual steps you need to do before running this code.
# Using the default raw_data and save locations
_ = gt.CA_DOF().get_CA_DOF_data(
    years=range(2010, 2022),
    geo_list=["region", "jurisdiction"])

### Creating Diff Files (generate_tables.DiffFiles)

In [7]:
# First create consolidated and individual files using the same parameters as before but with 
# a vintage of 2021_01
_ = gt.EstimatesTables().consolidate("2021_01", 
        geo_list=["region", "jurisdiction"], 
        save=True,
        save_folder=RAW_FOLDER)
_ = gt.EstimatesTables().individual("2021_01",
        geo_list=["region", "jurisdiction"],
        est_table_list=["household_income", "age_ethnicity"],
        save=True,
        save_folder=RAW_FOLDER)

In [8]:
# Create diff files 
_ = gt.DiffFiles().create_diff_tables("2020_06", "2021_01", 
        geo_list=["region", "jurisdiction"],
        est_table_list=["consolidated", "household_income", "age_ethnicity"],
        raw_data_folder=RAW_FOLDER,
        save_folder=DIFF_FOLDER)

## Run Checks

In [20]:
# Import the module that allows us to run checks
import perform_checks as pc

### Run Check 1

In [10]:
# Run internal consistency checks
pc.InternalConsistency().check_geography_aggregations(RAW_FOLDER, "2020_06", geo_list=["jurisdiction"])

Aggregating jurisdiction level data to region and comparing with region csv file
No errors



### Run Check 2

In [11]:
# TODO

### Run Check 3

In [12]:
# TODO

### Run Check 4

In [23]:
# Run checks that year over year Estimates values do not change by too much
pc.ThresholdAnalysis().yearly_change(RAW_FOLDER, "2020_06", "region", "population", threshold=5)

TypeError: ThresholdAnalysis.yearly_change() missing 1 required positional argument: 'col'

### Run Check 5

In [14]:
# N/A, done in Power BI

### Run Check 6

In [15]:
# Run checks that Estimates population values are within a certain range +/- 1.5% of CA DOF 
# population values
pc.DOFPopulation().region_DOF_population_comparison(DOF_FOLDER, RAW_FOLDER, "2020_06", threshold=1.5)

Running Check 6: DOF Total Population Comparison
Errors have occured on the following rows:
    Year  Est Total Population  Est Household Population  Est Group Quarters  \
0   2010               3095314                   2993348              101966   
11  2020               3343355                   3230945              112410   

    DOF Total Population  DOF Household Population  DOF Group Quarters  \
0                3095313                   2991515              103798   
11               3298634                   3172741              125893   

    % Total Population  % Household Population  % Group Quarters  
0             0.000032                0.061273          1.764967  
11            1.355743                1.834502         10.709889  



### Run Check 7

In [16]:
# TODO