In [4]:
from snowflake.snowpark import Session
from dotenv import load_dotenv
import os
import pandas as pd
import xgboost as xgb
import zipfile

load_dotenv(override=True)

connection_params = {
    "account": os.environ.get("SNOWFLAKE_ACCOUNT"),
    "user": os.environ.get("SNOWFLAKE_USER"),
    "password": os.environ.get("SNOWFLAKE_PASSWORD"),
    "role": "SYSADMIN",
    "database": "MARCH_MADNESS",
    "warehouse": "MARCH_MADNESS_WH",
    "schema": "COMMON",
}

session = Session.builder.configs(connection_params).create()

In [5]:
# Open the zip file using the 'with' statement to ensure proper closing
with zipfile.ZipFile("../data/march-machine-learning-mania-2023.zip") as zf:
    # Iterate over the files in the zip file
    for file in zf.filelist:
        # Check if the file is a CSV file
        if file.filename.endswith(".csv"):
            # Open the CSV file within the zip file
            with zf.open(file.filename) as z:
                # Read the CSV file into a pandas DataFrame using the ISO-8859-1 encoding
                df = pd.read_csv(z, encoding="iso-8859-1")
            
            # Create the table name for the Snowflake schema using the CSV file name
            table_name = f"RAW.{file.filename.split('/')[-1].replace('.csv', '').upper()}"
            
            # Convert the column names to uppercase
            df.columns = [col.upper() for col in df.columns]
            
            # Save the pandas DataFrame as a table in Snowflake with the specified table name
            # and overwrite the table if it already exists
            session.create_dataframe(df).write.save_as_table(
                table_name=table_name, mode="overwrite"
            )