In [0]:
sql = "select * from hive_metastore.default.us_stock_prices"
raw_df = spark.sql(sql)

In [0]:
raw_df.display()

In [0]:
raw_df.columns

In [0]:
from pyspark.sql import functions as F

# Get all unique dates and tickers
all_dates = raw_df.select("as_of_date").distinct()
all_tickers = raw_df.select("ticker").distinct()

# Cross join to get all date-ticker combinations
all_combinations = all_dates.crossJoin(all_tickers)

# Left join with original data and fill missing values with 0
fill_cols = ["open", "high", "low", "close", "volume"]
result = all_combinations.join(raw_df, on=["ticker", "as_of_date"], how="left").fillna(0, subset=fill_cols)

In [0]:
import matplotlib.pyplot as plt
import pandas as pd

# Order by date and convert to pandas
pdf = raw_df.orderBy("as_of_date").toPandas()

# Get unique tickers
tickers = pdf["ticker"].unique()

# Plot close price for each ticker
fig, ax = plt.subplots(figsize=(14, 7))

for ticker in tickers:
    ticker_data = pdf[pdf["ticker"] == ticker]
    ax.plot(ticker_data["as_of_date"], ticker_data["close"], label=ticker)

ax.set_xlabel("Date")
ax.set_ylabel("Close Price")
ax.set_title("Close Price by Ticker")
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [0]:
raw_df.createOrReplaceGlobalTempView("filled_raw_stock_price")