In [None]:
CREATE OR REPLACE STORAGE INTEGRATION my_gcs_integration
TYPE = EXTERNAL_STAGE
STORAGE_PROVIDER = 'GCS'
STORAGE_ALLOWED_LOCATIONS = ('gcs://sf-example')
ENABLED = TRUE;


In [None]:
CREATE OR REPLACE STAGE my_external_stage
URL = 'gcs://sf-example/ElectricVehiclePopulationData.json'
STORAGE_INTEGRATION = my_gcs_integration
FILE_FORMAT = (TYPE = 'JSON');


In [None]:
CREATE OR REPLACE TABLE SF_JSON_TABLE (
    json_data VARIANT
);

COPY INTO SF_JSON_TABLE
FROM @my_external_stage
FILE_FORMAT = (TYPE = 'JSON');


In [None]:
select json_data from SF_JSON_TABLE

In [None]:
import pandas as pd
import json

df = cell4.to_pandas()

json_string = df.loc[0,'JSON_DATA']

data = json.loads(json_string)

data['data'][0]


In [None]:
# this function will clean up the column names,
# removing any problematic characters
import re

def rename_column(column_name: str) -> str:
    """
    Renames a column name to be Snowflake-friendly.
    
    Snowflake column names:
    - Must start with a letter or underscore
    - Can contain letters, digits, or underscores
    - Cannot contain spaces or special characters (except for underscores)
    """
    # Replace spaces with underscores
    column_name = column_name.replace(" ", "_").lower()
    
    # Remove special characters (except underscores)
    column_name = re.sub(r'[^a-zA-Z0-9_]', '', column_name)
    
    # Ensure the column name starts with a letter or underscore
    if column_name[0].isdigit():
        column_name = f'__{column_name}'
    
    return column_name

def extract_columns(raw_data: dict) -> list:
    data = raw_data['meta']['view']['columns']
    result_columns = []
    for col in data:
        renamed_column, col_type = rename_column(col['name']), 'NUMBER' if col.get('renderTypeName', '') == 'number' else 'TEXT'
        result_columns.append((renamed_column, col_type,))
    return result_columns
    

In [None]:
cols = extract_columns(data)

cols

In [None]:
# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


#Create the dataset directly!
schema = [c[0] for c in cols]

df = session.createDataFrame(data=data['data'],schema=schema )

df.show()


In [None]:
from snowflake.snowpark.functions import col
from snowflake.snowpark.types import StringType, StructType, StructField, IntegerType

def create_electric_car_objects(data, cols, session):
    # Define the schema based on the column names and their types
    schema = StructType([StructField(col_name, IntegerType() if col_type == 'NUMBER' else StringType(), True) for col_name, col_type in cols])

    # Create a Snowpark DataFrame from the list of lists
    df = session.createDataFrame(data, schema=schema)

    return df

In [None]:
df2 = create_electric_car_objects(data['data'], cols, session)
df2.show()

In [None]:
# easily load this dataframe to a new table!

df.write.save_as_table('sf_ev_population_stg', mode='overwrite')