### Unzip all files

In [2]:
import os
import zipfile

# Specify the directory containing the zipped files
zip_directory = '/Users/jack.mccormick/OneDrive - Bonterra/TD39848'

# Specify the directory where you want to extract the files (desktop)
extract_directory = os.path.expanduser('/Users/jack.mccormick/Desktop/unzipped_rob')

# Clear existing files in the data folder
if os.path.exists(extract_directory):
    for filename in os.listdir(extract_directory):
        file_path = os.path.join(extract_directory, filename)
        if os.path.isfile(file_path):
            os.remove(file_path)
else:
    os.makedirs(extract_directory, exist_ok=True)
    
print("Old files deleted.")

# Create the extract directory if it doesn't exist
os.makedirs(extract_directory, exist_ok=True)

# Iterate over the files in the zip directory
for filename in os.listdir(zip_directory):
    if filename.endswith(".zip") and "FirstActions" in filename:
        zip_path = os.path.join(zip_directory, filename)
        
        # Extract the contents of the zip file
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(extract_directory)

print("Files extracted successfully!")

Old files deleted.
Files extracted successfully!


### Push unzipped files to stage

In [3]:
from snowflake.snowpark import Session
import configparser
import os

config = configparser.ConfigParser()
config_path = os.path.join("..", "credentials.ini")
config.read(config_path)

# Create a Snowflake session
session = Session.builder.configs({
    'account':config["ea_snowflake"]["ACCOUNT"],
    'user':config["ea_snowflake"]["USER"],
    'password':config["ea_snowflake"]["PASSWORD"],
    'role':config["ea_snowflake"]["ROLE"],
    'warehouse':config["ea_snowflake"]["WAREHOUSE"],
    'database':'DATA_SCIENCE',
    'schema':'RW_EA_VALUETHEMES'
}).create()

local_folder_path = '/Users/jack.mccormick/Desktop/unzipped_rob'

# # Upload the zip files to the stage
# session.file.put(local_folder_path + '/*.txt', f'@ea_value_theme_stage/first_actions', auto_compress=False, overwrite=True)

In [6]:
import os

# Specify the path to the local folder
folder_path = '/Users/jack.mccormick/Desktop/unzipped_rob'

# Get a list of all files in the folder
file_list = os.listdir(folder_path)

### Move files from stage to respective tables

In [4]:
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType

event_attendance_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("EVENTID", StringType()),
    StructField("REQUESTEDEVENTID", StringType()),
    StructField("EVENTSIGNUPDATE", StringType())
    ])

event_signup_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("EVENTID", StringType()),
    StructField("REQUESTEDEVENTID", StringType()),
    StructField("EVENTSIGNUPDATECREATED", StringType())
    ])

online_forms_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("FORMSUBMITDATE", StringType()),
    StructField("COUNTFORMSUBMITS", IntegerType()),
    StructField("COUNTREFERREDFORMSUBMITS", IntegerType())
    ])

pledge_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("PLEDGEDATE", StringType()),
    StructField("COUNTPLEDGES", IntegerType()),
    StructField("COUNTATTRIBUTEDPLEDGES", IntegerType())
    ])

contributions_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("CONTACTCONTRIBUTIONSID", StringType()),
    StructField("DATERECEIVED", StringType()),
    StructField("CONTRIBUTIONATTRIBUTIONS", StringType())
    ])

contributions_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("CONTACTCONTRIBUTIONSID", StringType()),
    StructField("DATERECEIVED", StringType()),
    StructField("CONTRIBUTIONATTRIBUTIONS", StringType())
    ])

first_action_schema = StructType([
    StructField("DATABASENAME", StringType()),
    StructField("STATEID", StringType()),
    StructField("COMMITTEEID", IntegerType()),
    StructField("VANID", IntegerType()), 
    StructField("DATECREATED", StringType()), 
    StructField("DATESUPPRESSED", StringType()), 
    StructField("DATEACQUIRED", StringType()),
    StructField("FIRSTCONTRIBUTION", StringType()), 
    StructField("FIRSTEVENTSIGNUP", StringType()), 
    StructField("FIRSTEVENTATTENDANCE", StringType()), 
    StructField("FIRSTONLINEFORM", StringType()),
    StructField("FIRSTPLEDGE", StringType())
    ])

In [10]:
def copy_table_function (file_name):
    if "EventSignupsByDay" in file_name:
        staging_file = session.read.options({'field_delimiter':'\t','skip_header':1}).schema(schema=event_signup_schema).csv(f'@ea_value_theme_stage/{file_name}')
        staging_file.copy_into_table('EVENT_SIGN_UPS_BY_DAY')
    elif "EventAttendanceByDay" in file_name:
        staging_file = session.read.options({'field_delimiter':'\t','skip_header':1}).schema(schema=event_attendance_schema).csv(f'@ea_value_theme_stage/{file_name}')
        staging_file.copy_into_table('EVENT_ATTENDANCE_BY_DAY')
    elif "OnlineForms" in file_name:
        staging_file = session.read.options({'field_delimiter':'\t','skip_header':1}).schema(schema=online_forms_schema).csv(f'@ea_value_theme_stage/{file_name}')
        staging_file.copy_into_table('ONLINE_FORMS')
    elif "Pledges" in file_name:
        staging_file = session.read.options({'field_delimiter':'\t','skip_header':1}).schema(schema=pledge_schema).csv(f'@ea_value_theme_stage/{file_name}')
        staging_file.copy_into_table('PLEDGES')
    elif "ContribsByDay" in file_name:
        staging_file = session.read.options({'field_delimiter':'\t','skip_header':1}).schema(schema=contributions_schema).csv(f'@ea_value_theme_stage/{file_name}')
        staging_file.copy_into_table('CONTRIBUTIONS')
    elif "FirstActions" in file_name:
            staging_file = session.read.options({'field_delimiter':'\t','skip_header':1}).schema(schema=first_action_schema).csv(f'@ea_value_theme_stage/first_actions/{file_name}')
            staging_file.copy_into_table('FIRST_ACTIONS')

In [12]:
for f in file_list:
    print(f)
    copy_table_function(f)

TD39848_ValueThemeMetrics_FirstActions_20240411_18.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_19.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_8.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_9.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_22.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_23.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_21.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_20.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_2.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_11.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_10.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_3.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_1.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_12.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_13.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_4.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_17.txt
TD39848_ValueThemeMetrics_FirstActions_20240411_16.txt
TD39848_ValueThe