#  <span style="font-family: Latin Modern Roman; font-size: 35px; font-weight: bold;"> TradeData Project: Visualize Crypto Data</span>

---

## <span style="font-family: Latin Modern Roman; font-size: 25px;"> Sprint 4 (Historia de Usuario 5) </span>

In [2]:
from pyspark import SparkConf
from pyspark.sql import SparkSession

In [3]:
conf = (SparkConf()
            .setMaster("yarn")
            .set("spark.executor.cores", 5)
            .set("spark.sql.shuffle.partitions", 200)
            .set("spark.default.parallelism", 200)
            .set("spark.executor.memory", "7g")
            .set("spark.dynamicAllocation.maxExecutors", 20)
        )

spark = SparkSession \
    .builder \
    .config(conf = conf) \
    .appName("CryptoVisualization") \
    .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [9]:
# Import necessary libraries
import pandas as pd
import plotly.graph_objects as go

# List of cryptocurrencies
cryptos = [
    "BTCUSDT", "ETHUSDT", "XRPUSDT", "SOLUSDT",
    "DOGEUSDT", "ADAUSDT", "SHIBUSDT", "DOTUSDT",
    "AAVEUSDT", "XLMUSDT"
]

# Year to visualize
visualization_year = 2024

# Iterate over each cryptocurrency
for symbol in cryptos:
    # Load Silver Layer
    silver_path = f"/datos/gittba/gittba04/{symbol}_Silver/year={visualization_year}"
    df_silver = spark.read.parquet(silver_path).select("date", "open", "high", "low", "close", "volume")

    # Load Gold Layer 
    gold_path = f"/datos/gittba/gittba04/{symbol}_Gold/year={visualization_year}"
    df_gold = spark.read.parquet(gold_path).select("date", "SMA200", "EMA50")

    # Merge both dataframes on 'date'
    df_merged = df_silver.join(df_gold, "date", "inner")

    # Convert to Pandas
    df_pd = df_merged.orderBy("date").toPandas()
    df_pd["date"] = pd.to_datetime(df_pd["date"])
    df_pd.set_index("date", inplace = True)

    # Ensure all values are numeric
    df_pd[["open", "high", "low", "close", "SMA200", "EMA50"]] = df_pd[
        ["open", "high", "low", "close", "SMA200", "EMA50"]
    ].apply(pd.to_numeric, errors="coerce")

    # Create figure
    fig = go.Figure()

    # Add Candlestick trace
    fig.add_trace(go.Candlestick(
        x=df_pd.index, open=df_pd["open"], high=df_pd["high"],
        low=df_pd["low"], close=df_pd["close"],
        name = "Candlesticks"))

    # Add SMA200 trace
    fig.add_trace(go.Scatter(
        x=df_pd.index, y=df_pd["SMA200"], mode="lines",
        name = "SMA 200", line=dict(color = "blue", width=1.5)))

    # Add EMA50 trace
    fig.add_trace(go.Scatter(
        x=df_pd.index, y=df_pd["EMA50"], mode="lines",
        name = "EMA 50", line=dict(color = "orange", width=1.5)))

    # Layout adjustments
    fig.update_layout(
        title=f"{symbol} Price & Indicators - {visualization_year}",
        xaxis_title = "Date", yaxis_title = "Price (USD)",
        xaxis_rangeslider_visible = False,
        hovermode = "x unified"
    )

    # Show interactive plot
    fig.show()


                                                                                