## Snowflake Data Load Notebook
This notebook creates the required Snowflake objects, stages a CSV from GitHub, and loads it into a table – all using **Snowpark for Python**.
**Prerequisites**
1. The `snowflake-snowpark-python` and `requests` packages are installed (see the next code cell).
2. Environment variables with your connection info are set in the kernel/session:
   * `SNOWFLAKE_ACCOUNT` –  Go to Account Details in snowflake . It should  look like bewlo 
   * `SNOWFLAKE_USER` 
   * `SNOWFLAKE_PASSWORD`

[connections.my_example_connection]
- account = "XXXX-XXXXX"
- user = "HUSEYN"
- role = "ACCOUNTADMIN"


In [38]:
import pathlib
import os, pathlib, requests
from snowflake.snowpark import Session


In [39]:

print("Working dir:", os.getcwd())
print("Files here:", os.listdir())


Working dir: c:\Users\ping\Documents\Bootcamps\Data-Analytics-Engineer-Bootcamp\dataflow\notebooks
Files here: ['netflix_titles.csv', 'snowpark_bootstrap.ipynb']


In [40]:

from dotenv import load_dotenv
load_dotenv()

connection_parameters = {
    "account":   os.environ["SNOWFLAKE_ACCOUNT"],
    "user":      os.environ["SNOWFLAKE_USER"],
    "password":  os.environ["SNOWFLAKE_PASSWORD"],
    "role":      "ACCOUNTADMIN",  
    "warehouse": "COMPUTE_WH",        
}

session = Session.builder.configs(connection_parameters).create()
session.sql("SELECT CURRENT_VERSION() AS VERSION").show()

-------------
|"VERSION"  |
-------------
|9.12.1     |
-------------



In [41]:

session.sql("USE ROLE ACCOUNTADMIN").collect()


session.sql("DROP ROLE IF EXISTS DBT_ROLE").collect()
session.sql("CREATE ROLE IF NOT EXISTS DBT_ROLE").collect()

session.sql("GRANT ROLE SYSADMIN TO ROLE DBT_ROLE").collect()

session.sql("GRANT ROLE DBT_ROLE TO USER DBT_USER").collect()

session.sql("ALTER USER DBT_USER SET DEFAULT_ROLE = DBT_ROLE").collect()



[Row(status='Statement executed successfully.')]

In [42]:

def run_many(sql: str):
    for stmt in [s.strip() for s in sql.split(";") if s.strip()]:
        session.sql(stmt).collect()

In [43]:

run_many('''
CREATE OR REPLACE WAREHOUSE DBT_WH WAREHOUSE_SIZE = "XSMALL";

CREATE DATABASE IF NOT EXISTS DBT_DB;
CREATE SCHEMA   IF NOT EXISTS DBT_DB.DBT_SCHEMA;
         


CREATE DATABASE IF NOT EXISTS DBT_TARGET_DB;
CREATE SCHEMA   IF NOT EXISTS DBT_TARGET_DB.DBT_TARHET_SCHEMA;


         


GRANT USAGE          ON WAREHOUSE DBT_WH            TO ROLE DBT_ROLE;
         
GRANT ALL PRIVILEGES ON DATABASE  DBT_DB            TO ROLE DBT_ROLE;
GRANT ALL PRIVILEGES ON DATABASE  DBT_TARGET_DB            TO ROLE DBT_ROLE;
         


GRANT ALL PRIVILEGES ON SCHEMA    DBT_DB.DBT_SCHEMA TO ROLE DBT_ROLE;
GRANT ALL PRIVILEGES ON SCHEMA    DBT_TARGET_DB.DBT_TARHET_SCHEMA TO ROLE DBT_ROLE;

CREATE USER IF NOT EXISTS DBT_USER
  PASSWORD            = 'StrongPassword12345' 
  DEFAULT_ROLE        = DBT_ROLE
  DEFAULT_WAREHOUSE   = DBT_WH
  MUST_CHANGE_PASSWORD = FALSE;
    
         
GRANT ROLE DBT_ROLE TO USER DBT_USER;

''')
print("Bootstrap complete.")

Bootstrap complete.


In [44]:

run_many("CREATE OR REPLACE STAGE DBT_DB.DBT_SCHEMA.NETFLIX_RAW_STAGE;")

In [45]:

csv_url   = "https://raw.githubusercontent.com/HuseynA28/DataFlow-Snowflake-Airflow-dbt-Docker-CICD-/refs/heads/main/data/netflix_titles.csv"
local_csv = pathlib.Path("netflix_titles.csv")
local_csv.write_bytes(requests.get(csv_url, timeout=30).content)


3399671

In [46]:

session.file.put(
    str(local_csv),                               
    "@DBT_DB.DBT_SCHEMA.NETFLIX_RAW_STAGE",       
    overwrite=True,
)

print("File uploaded to stage.")

File uploaded to stage.


In [47]:
session.sql(''' USE ROLE DBT_ROLE ''')

<snowflake.snowpark.dataframe.DataFrame at 0x1f5d5038b10>

In [48]:
session.sql(
  '''CREATE OR REPLACE FILE FORMAT DBT_DB.DBT_SCHEMA.CSV_NETFLIX_RAW
  TYPE                       = 'CSV'
  COMPRESSION                = 'AUTO'               
  FIELD_DELIMITER            = ','                  
  FIELD_OPTIONALLY_ENCLOSED_BY = '"'              
  PARSE_HEADER = TRUE; ''').collect()


[Row(status='File format CSV_NETFLIX_RAW successfully created.')]

In [49]:
session.sql('''USE database  DBT_DB ''').collect()
session.sql('''USE schema  DBT_SCHEMA ''').collect()


[Row(status='Statement executed successfully.')]

In [50]:
session.sql(''' 
       
CREATE OR REPLACE TABLE  NETFLIX_DATA
  USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@DBT_DB.DBT_SCHEMA.NETFLIX_RAW_STAGE',
          FILE_FORMAT=>'CSV_NETFLIX_RAW'
        )
      ))''').collect()




[Row(status='Table NETFLIX_DATA successfully created.')]

In [51]:
session.sql(''' 


CREATE OR REPLACE FILE FORMAT DBT_DB.DBT_SCHEMA.csv_ff
  TYPE  = CSV
  FIELD_DELIMITER = ','
  FIELD_OPTIONALLY_ENCLOSED_BY = '"'
  SKIP_HEADER = 1; 


   ''' ).collect()

[Row(status='File format CSV_FF successfully created.')]

In [52]:
session.sql(
    ''' 
  COPY INTO NETFLIX_DATA FROM @DBT_DB.DBT_SCHEMA.NETFLIX_RAW_STAGE
  FILE_FORMAT = (
    FORMAT_NAME= 'csv_ff'
  )
            
  ''').collect()

[Row(file='netflix_raw_stage/netflix_titles.csv.gz', status='LOADED', rows_parsed=8807, rows_loaded=8807, error_limit=1, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None)]