## Snowflake connection credentials 

In [0]:
import os

sfOptions = {
  "sfURL": os.environ['SNOWFLAKE_ACCOUNT'],
  "sfUser": os.environ["SNOWFLAKE_USER"],
  "sfPassword": os.environ["SNOWFLAKE_PASSWORD"],
  "sfDatabase": "CRYPTO_ANALYSIS_DB",
  "sfSchema": "SILVER",
  "sfWarehouse": "COMPUTE_WH"
}

## Intermediate transformation of data

In [0]:
from pyspark.sql.functions import col, to_timestamp, when, to_date, upper, round

df_price_bronze = spark.table("crypto_sentiment_bronze_price")

df_price_silver = df_price_bronze.select(
    upper(col("symbol")).alias("symbol"),
    round(col("price"), 4).alias("price"),
    col("cmc_rank"),
    col("volume_24h").cast("long"),
    round(col("percent_change_1h"), 2).alias("percent_change_1h"),
    round(col("percent_change_24h"), 2).alias("percent_change_24h"),
    round(col("percent_change_7d"), 2).alias("percent_change_7d"),
    col("last_updated").cast("timestamp").alias("market_timestamp"),
    col("ingested_at").cast("timestamp").alias("ingested_at") 
)

In [0]:
df_price_silver = df_price_silver.filter((col("price") > 0) & (col("volume_24h") > 0))

df_price_silver = df_price_silver.withColumn("market_date", to_date(col("market_timestamp")))

df_price_silver = df_price_silver.withColumn(
    "risk_category",
    when(col("cmc_rank") <= 10, "Blue Chip")
    .when((col("cmc_rank") > 10) & (col("cmc_rank") <= 50), "Mid Cap")
    .otherwise("Small Cap")
)

df_price_silver.display()

symbol,price,cmc_rank,volume_24h,percent_change_1h,percent_change_24h,percent_change_7d,market_timestamp,ingested_at,market_date,risk_category
ONDO,0.2618,51,72483716,0.47,2.51,-6.67,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Small Cap
XRP,1.4598,4,3228554522,-0.17,3.9,-9.55,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Blue Chip
CMC20,145.8715,8834,5510111,0.01,3.32,-9.24,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Small Cap
DOGE,0.0979,9,1135918997,-0.35,1.82,-6.63,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Blue Chip
USDT,0.9993,3,94096940583,-0.01,0.04,0.03,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Blue Chip
AVAX,9.2433,24,304212062,-0.15,1.3,-7.32,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Mid Cap
LINK,8.9258,16,720055017,-0.72,1.7,-7.94,2026-02-08T14:31:00Z,2026-02-08T14:31:43Z,2026-02-08,Mid Cap
USDC,1.0,6,11095538351,0.0,0.03,0.02,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Blue Chip
SOL,88.2124,7,3802293917,-0.37,2.23,-14.61,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Blue Chip
ENS,6.0235,128,26155480,-0.42,0.06,-12.11,2026-02-08T14:30:00Z,2026-02-08T14:31:43Z,2026-02-08,Small Cap


In [0]:

df_news_bronze = spark.table("crypto_sentiment_bronze_news")

df_news_silver = df_news_bronze.select(
    upper(col("symbol")).alias("symbol"),
    upper(col("name")).alias("name"),
    col("description").alias("news_headline"),
    col("prediction"),
    col("published_at").cast("timestamp").alias("news_timestamp")
)


In [0]:

df_news_silver = df_news_silver.withColumn("news_date", to_date(col("news_timestamp")))


df_news_silver = df_news_silver.withColumn(
                "sentiment_score",
                when(col("prediction") == "Positive", 1)
                .when(col("prediction") == "Negative", -1)
                .otherwise(0)
            )

display(df_news_silver)

symbol,name,news_headline,prediction,news_timestamp,news_date,sentiment_score
BTC,BITCOIN,"Bitcoin (BTC) price is hovering near $68,890, but the Sharpe Ratio shows intact caution, indicating rising risk relative to returns, slipping into a historical bear-market zone.",Negative,2026-02-07T14:18:40Z,2026-02-07,-1
SOL,SOLANA,"Solana (SOL) staged a sharp intraday recovery, posting a 12% daily gain despite lingering market uncertainty, though long-term holder buying momentum is slowing.",Positive,2026-02-07T14:06:53Z,2026-02-07,1
ETH,ETHEREUM,"Ethereum (ETH) had one of its sharpest historic declines over the past 10 days, shedding 40% of its value and briefly sliding below $2,000.",Negative,2026-02-07T14:22:21Z,2026-02-07,-1
BTC,BITCOIN,"Bitcoin (BTC) has fallen below its 365-day MA, with weak spot and institutional demand and tightened liquidity, signaling a bear market.",Negative,2026-02-07T14:03:39Z,2026-02-07,-1
BTC,BITCOIN,"Discussion on the end of the monetary system and collapsing fiat currencies, implying a positive outlook for Bitcoin as an alternative.",Positive,2026-02-07T14:01:22Z,2026-02-07,1
BTC,BITCOIN,"Debate ensues about whether the crypto market has crashed or is merely experiencing a dip, with Bitcoin's behavior surprising analysts.",Neutral,2026-02-07T14:10:40Z,2026-02-07,0
BTC,BITCOIN,Scott Melker discussed Bitcoin's resilience through past crashes but also highlighted potential systemic threats to its stability.,Neutral,2026-02-07T14:15:43Z,2026-02-07,0
SHIB,SHIBA INU,The Shiba Inu team issued a crucial wallet security notice to the SHIB community due to a new emerging threat.,Negative,2026-02-07T14:05:00Z,2026-02-07,-1
ETH,ETHEREUM,"Vitalik Buterin calls for L2 shift as Ethereum L1 scales, indicating continued development and scaling efforts.",Positive,2026-02-07T14:02:59Z,2026-02-07,1
BTC,BITCOIN,Macro investor Jordi Visser discusses why Bitcoin is selling off and capital shifting towards scarce assets.,Negative,2026-02-07T14:01:13Z,2026-02-07,-1


## Loading the data in Snowflake in Silver Layer

In [0]:
from pyspark.sql.functions import col, current_timestamp

df_news_snowflake = df_news_silver.select(
    col("symbol").alias("SYMBOL"),
    col("name").alias("COIN_NAME"),
    col("news_headline").alias("NEWS_HEADLINE"),
    col("prediction").alias("PREDICTION"),
    col("news_timestamp").alias("NEWS_TIMESTAMP"),
    col("news_date").alias("NEWS_DATE"),
    col("sentiment_score").alias("SENTIMENT_SCORE"),
    current_timestamp().alias("LOAD_DTM")
)


In [0]:
df_news_snowflake.write \
  .format("snowflake") \
  .options(**sfOptions) \
  .option("dbtable", "CRYPTO_NEWS") \
  .mode("append") \
  .save()



In [0]:
from pyspark.sql.functions import col, current_timestamp

df_price_snowflake = df_price_silver.select(
    col("symbol").alias("SYMBOL"),
    col("price").alias("PRICE"),
    col("cmc_rank").alias("COIN_RANK"),
    col("volume_24h").alias("VOLUME_24H"),
    col("percent_change_1h").alias("PERCENT_CHANGE_1H"),
    col("percent_change_24h").alias("PERCENT_CHANGE_24H"),
    col("percent_change_7d").alias("PERCENT_CHANGE_7D"),
    col("market_timestamp").alias("MARKET_TIMESTAMP"),
    col("ingested_at").alias("INGESTED_AT"),
    col("market_date").alias("MARKET_DATE"),
    col("risk_category").alias("RISK_CATEGORY"),
    current_timestamp().alias("LOAD_DTM")
)


In [0]:
df_price_snowflake.write\
    .format("snowflake")\
    .options(**sfOptions) \
    .option("dbtable", "CRYPTO_PRICE") \
    .mode("append")\
    .save()

## Loading the data in Delta table in Silver Layer

In [0]:
df_price_silver.write \
  .format("delta") \
  .mode("overwrite") \
  .option("overwriteSchema", "true") \
  .saveAsTable("crypto_sentiment_silver_price")


In [0]:
df_news_silver.write \
  .format("delta") \
  .mode("overwrite") \
  .option("overwriteSchema", "true") \
  .saveAsTable("crypto_sentiment_silver_news")
