In [None]:
import snowflake.snowpark.functions as F
from snowflake.snowpark.session import Session
from snowflake.snowpark.types import StructType, StructField, FloatType
from snowflake.snowpark import Session
import os
import json

In [None]:
# Assuming you have environment variables set for the credentials
connection_parameters = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "schema": os.getenv("SNOWFLAKE_SCHEMA"),
    "database": os.getenv("SNOWFLAKE_DATABASE"),
    "role": os.getenv("SNOWFLAKE_ROLE"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
}

session = Session.builder.configs(connection_parameters).create()

In [None]:
#with open(r'C:\Users\habdullayev\Documents\GitHub\SnowflakeML\Snowflake_ML_Intro\creds.json') as f:
    #connection_parameters = json.load(f)

#session = Session.builder.configs(connection_parameters).create()
#print(f"Current Database and schema: {session.get_fully_qualified_current_schema()}")
#print(f"Current Warehouse: {session.get_current_warehouse()}")

In [None]:
ad_df = session.table("ADVERTISING")

In [None]:
ad_df.show()

In [None]:
def convert_to_float(column):
    return F.regexp_replace(F.col(column), '_', '.').cast(FloatType())

In [None]:
# Apply conversion to all columns
ad_df = ad_df.with_column("TV", convert_to_float("TV")) \
             .with_column("RADIO", convert_to_float("RADIO")) \
             .with_column("NEWSPAPER", convert_to_float("NEWSPAPER")) \
             .with_column("SALES", convert_to_float("SALES"))

In [None]:
missing_values = ad_df.select(
    F.sum(F.col("TV").is_null().cast("int")).alias("TV_missing"),
    F.sum(F.col("RADIO").is_null().cast("int")).alias("RADIO_missing"),
    F.sum(F.col("NEWSPAPER").is_null().cast("int")).alias("NEWSPAPER_missing"),
    F.sum(F.col("SALES").is_null().cast("int")).alias("SALES_missing")
).collect()

In [None]:
print("Missing Values:", missing_values)

In [None]:
medians = ad_df.agg(
    F.median("TV").alias("TV_MEDIAN"),
    F.median("RADIO").alias("RADIO_MEDIAN"),
    F.median("NEWSPAPER").alias("NEWSPAPER_MEDIAN"),
    F.median("SALES").alias("SALES_MEDIAN")
).collect()

In [None]:
tv_median = medians[0]["TV_MEDIAN"]
radio_median = medians[0]["RADIO_MEDIAN"]
newspaper_median = medians[0]["NEWSPAPER_MEDIAN"]
sales_median = medians[0]["SALES_MEDIAN"]


In [None]:
ad_df = ad_df.na.fill({
    "TV": tv_median,
    "RADIO": radio_median,
    "NEWSPAPER": newspaper_median,
    "SALES": sales_median
})

In [None]:
ad_df.show()

In [38]:
ad_df.write.mode("overwrite").save_as_table("ADVERTISING")