In [15]:
import subprocess
import os
from google.cloud import storage
from google.cloud import bigquery
import pandas as pd
import re
import datetime

In [16]:
file = "/home/jupyter/gitlab/data_engineering_convenience_code/gbq_migration_scripts/files/delete.csv"
logfile = "/home/jupyter/gitlab/data_engineering_convenience_code/gbq_migration_scripts/logs/delete.log"

In [17]:
df = pd.read_csv(file)
#df

In [18]:
storage_client = storage.Client(project="polsbigquery")
bigquery_client = bigquery.Client(project="polsbigquery")

In [19]:
def check_bucket_exists(bucket):
    """
    NAME: check_bucket_exists
    
    DESCRIPTION: Checks if the bucket exists
    
    PARAMETERS:
        bucket(str): name of the bucket
        
    RETURNS
        True/False
    """
    
    try:
        bucket = storage_client.get_bucket(bucket)
        return True
    except:
        return False
    
def capture_log_data(table_name):   
    
    """
    NAME: capture_log_data
    
    DESCRIPTION: Captures log data from the table instance before deletion
    
    PARAMETERS:
        table_name(str): name of the table instance
        
    RETURNS
        msg(str): message to be sent to logfile with table metadata
    """
    
    # Capture table metadata
    table_ref = bigquery_client.get_table(table_name)
    
    # setting variables
    table_id = table_ref.table_id
    path = table_ref.path
    num_rows = table_ref.num_rows
    num_bytes = table_ref.num_bytes
    modified = table_ref.modified
    created = table_ref.created
    schema = table_ref.schema
    
    # making messsage
    msg = f"""
-----------------------------------------------------------------------------------------------------------------------------------------------
table_id: {table_id}
path: {path}
num_rows: {num_rows}
num_bytes: {num_bytes}
modified: {modified}
created: {created}
schema: {schema}
"""
    
    return msg
    
def delete_table(table_name):
    """
    NAME: delete_table
    
    DESCRIPTION: Deletes the table
    
    PARAMETERS:
        table_name(str): name of the table
        
    RETURNS:
        msg(str): message to be sent to logfile with action time
    """
    
    # Getting table reference
    table_ref = bigquery_client.get_table(table_name)
    
    # Deleting the table
    bigquery_client.delete_table(table_ref)
    
    #Creating message
    msg = f"""Table Action: Deleted {table_name} at {datetime.datetime.now().isoformat()}."""
    
    return msg

def write_to_log_file(file, msg):
    """
    NAME: write_to_log_file
    
    DESCRIPTION: Writes message to the log file
    
    PARAMETERS:
        file(str): name of the log file
        msg(str): message to be sent to logfile
    """
    
    # append to log file
    
    with open(file, 'a') as f:
        f.write(msg)

In [21]:
for r in range(0,len(df)):
    
    project = df['project'][r]
    dataset = df['schema'][r]
    table = df['table'][r]
    action = df['action'][r]
    
    if action == 'delete':
        
        table_name = project+"."+dataset+"."+table
        
        # Capture log data
        metadata_msg = capture_log_data(table_name)
        
        # Delete table
        delete_msg = delete_table(table_name)
        
        # Cobble together messages
        msg = metadata_msg + delete_msg
        
        # Send message to log file
        write_to_log_file(logfile, msg)
        
    else:
        continue
        