In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException
from pyspark.sql.functions import make_date, col, year
from pyspark.sql.functions import max

csv_path = 'gs://<BUCKET_NAME>/public-data/finance/gc=f_price.csv'
iceberg_path = "gs://<BUCKET_NAME>/warehousing/finance/gc=f_price"

In [None]:
df = spark.read.option("header", "true").option("inferSchema", "true").csv(csv_path)
df.count()

In [None]:
df_optimized = df.drop('High', 'Low', 'Open', 'Volume')
df_optimized = df_optimized.withColumn('year', year(col('date')))
df_optimized.show()

In [None]:
df_optimized.write.format('iceberg').mode('overwrite').partitionBy('year').saveAsTable('iceberg_catalog.finance.gold_price')

In [None]:
df_pandas = df_optimized.toPandas()
df_pandas.count

In [None]:
# !pip install prophet
import pandas as pd
from prophet import Prophet

In [None]:
series = df_pandas.rename(columns={'Date': 'ds', 'Close': 'y'})
confidence_interval = 0.9
model = Prophet(interval_width=confidence_interval)
model.fit(series)

In [None]:
forecast_period = 365
future = model.make_future_dataframe(periods=forecast_period)
forecast = model.predict(future)

In [None]:
import matplotlib
fig1 = model.plot(forecast)
fig1.gca().set_title("Gold Spot Price Forecast", size=16)
fig1.gca().set_xlabel("Date")
fig1.gca().set_ylabel("Price USD/Ounce")


In [None]:
forecast1 = forecast[['ds', 'yhat']][forecast['ds']> '2025-07-17']
forecast1.columns = ['Date', 'Close']
forecast1['Date'] = forecast1['Date'].dt.date
df_forecast = spark.createDataFrame(forecast1)
df_forecast = df_forecast.withColumn('year', year(col('date')))
df_forecast.show()

In [None]:
combined_df = df_optimized.unionByName(df_forecast)
combined_df.tail(20)

In [None]:
combined_df.write.format('iceberg').mode('append').partitionBy('year').saveAsTable('iceberg_catalog.finance.gold_price')