## Setup

In [1]:
import os
import sys
import logging
import pandas as pd
from google.cloud import bigquery
from hashlib import md5
from typing import List


# **** SETUP ****

# change to match your filesystem
DATA_DIR = "../data/"
DEFAULT_RECEIPTS_FILE = os.path.join(DATA_DIR, "tickets.json")
# change to match your gcloud project 
PROJECT_NAME = "deb-01-372112"
DATASET_NAME = "tickets"


# **** TABLE SCHEMAS ****

TABLE_METADATA = {
    'airlines': {
        'table_name': 'airlines',
        'schema': [
            # indexes are written if only named in the schema
            bigquery.SchemaField('airline_iata', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airline_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airline_icao', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airline_callsign', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airline_country', 'string', mode='REQUIRED'),
        ],
    },
    'airports': {
        'table_name': 'airports',
        'schema': [
            # indexes are written if only named in the schema
            bigquery.SchemaField('airport_iata', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airport_city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airport_country', 'string', mode='NULLABLE'),
            bigquery.SchemaField('airport_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airport_icao', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airport_latitude', 'float', mode='REQUIRED'),
            bigquery.SchemaField('airport_longitude', 'float', mode='REQUIRED'),
            bigquery.SchemaField('airport_altitude', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('airport_tz_timezone', 'string', mode='REQUIRED')
        ],
    },
  'passengers': {
        'table_name': 'passengers',
        'schema': [
            bigquery.SchemaField('passenger_sk', 'string', mode='REQUIRED'),
            bigquery.SchemaField('first_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('last_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('gender', 'string', mode='REQUIRED'),
            bigquery.SchemaField('birth_date', 'date_time', mode='REQUIRED'),
            bigquery.SchemaField('email', 'string', mode='REQUIRED'),
            bigquery.SchemaField('street', 'string', mode='REQUIRED'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('state', 'string', mode='REQUIRED'),
            bigquery.SchemaField('zip', 'string', mode='REQUIRED'),
            bigquery.SchemaField('start_date', 'date_time', mode='REQUIRED'),
            bigquery.SchemaField('end_date', 'date_time', mode='REQUIRED'),

        ]
    }  
}


# **** SETUP LOGGING ****
# setup logging and logger
logging.basicConfig(            # setting up the root logger
    format='[%(levelname)-5s][%(asctime)s][%(module)s:%(lineno)04d] : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)
logger: logging.Logger = logging.getLogger('root')      # alias the root logger as `logger`
logger.setLevel(logging.DEBUG)                          # programmatically reassign the logging level


# **** BIGQUERY CLIENT ****
logger.debug(f"Creating bigquery client")
client = bigquery.Client()

logger.info(f"Setup Completed")

[DEBUG][2023-01-08 13:37:22,971][4118340907:0082] : Creating bigquery client
[INFO ][2023-01-08 13:37:22,985][4118340907:0085] : Setup Completed
