In [1]:
import pandas as pd
import json
import snowflake.snowpark.functions as F
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions
from snowflake.snowpark import Session

In [2]:
#with open(r'C:\Users\habdullayev\Documents\GitHub\SnowflakeML\Snowflake_ML_Intro\creds.json') as f:
#    connection_parameters = json.load(f)
#session = Session.builder.configs(connection_parameters).create()
#print(f"Current Database and schema: {session.get_fully_qualified_current_schema()}")
#print(f"Current Warehouse: {session.get_current_warehouse()}")

Current Database and schema: "SNOWPARK"."TITANIC"
Current Warehouse: "COMPUTE_WH"


In [3]:
session = Session.builder.configs(SnowflakeLoginOptions()).getOrCreate()

SnowflakeLoginOptions() is in private preview since 0.2.0. Do not use it in production. 


In [4]:
session.sql("CREATE DATABASE IF NOT EXISTS MLOPS;").collect()
session.sql("CREATE SCHEMA IF NOT EXISTS MLOPS.advertising;").collect()

[Row(status='Schema ADVERTISING successfully created.')]

In [5]:
session.sql("CREATE STAGE if not exists ML_DATA").collect()

[Row(status='Stage area ML_DATA successfully created.')]

In [19]:
ad = pd.read_csv(
    'https://raw.githubusercontent.com/HuseynA28/Snowflake-MLOPS/main/dataset/advertising.csv'
)
ad.columns = [c.upper() for c in ad.columns]
ad.to_csv("advertising.csv", index=False)

In [20]:
session.file.put("advertising.csv", "@ml_data", overwrite=True)

[PutResult(source='advertising.csv', target='advertising.csv.gz', source_size=4224, target_size=1792, source_compression='NONE', target_compression='GZIP', status='UPLOADED', message='')]

Use the PUT command to upload a local CSV file to a Snowflake stage.

In [21]:
# Create a Snowpark DataFrame that is configured to load data from the CSV file
avertising_df = (
    session.read.option("infer_schema", True)
    .option("PARSE_HEADER", True)
    .csv("@ml_data/advertising.csv")
)
avertising_df.show()

-------------------------------------------
|"TV"   |"RADIO"  |"NEWSPAPER"  |"SALES"  |
-------------------------------------------
|230.1  |37.8     |69.2         |22.1     |
|44.5   |39.3     |45.1         |10.4     |
|17.2   |45.9     |69.3         |12.0     |
|151.5  |41.3     |58.5         |16.5     |
|180.8  |10.8     |58.4         |17.9     |
|8.7    |48.9     |75.0         |7.2      |
|57.5   |32.8     |23.5         |11.8     |
|120.2  |19.6     |11.6         |13.2     |
|8.6    |2.1      |1.0          |4.8      |
|199.8  |2.6      |21.2         |15.6     |
-------------------------------------------



In [17]:
def fix_values(column):
    return F.upper(F.regexp_replace(F.col(column), "[^a-zA-Z0-9]+", "_"))


for col in ["TV", "RADIO", "NEWSPAPER", "SALES"]:
    avertising_df = avertising_df.with_column(col, fix_values(col))

avertising_df.show()

-------------------------------------------
|"TV"   |"RADIO"  |"NEWSPAPER"  |"SALES"  |
-------------------------------------------
|230_1  |37_8     |69_2         |22_1     |
|44_5   |39_3     |45_1         |10_4     |
|17_2   |45_9     |69_3         |12_0     |
|151_5  |41_3     |58_5         |16_5     |
|180_8  |10_8     |58_4         |17_9     |
|8_7    |48_9     |75_0         |7_2      |
|57_5   |32_8     |23_5         |11_8     |
|120_2  |19_6     |11_6         |13_2     |
|8_6    |2_1      |1_0          |4_8      |
|199_8  |2_6      |21_2         |15_6     |
-------------------------------------------



In [18]:
avertising_df.write.mode("overwrite").save_as_table("avertising")