# Ingest tickers price file

In [0]:
%run "../utils/mount_configuration"

In [0]:
%run "../utils/incremental_load"

In [0]:
from pyspark.sql.types import StructType, StructField, LongType, StringType, ArrayType, BooleanType, DoubleType

tickers_price_schema = StructType([
    StructField('adjusted', BooleanType(), True), 
    StructField('count', LongType(), True), 
    StructField('queryCount', LongType(), True), 
    StructField('request_id', StringType(), True), 
    StructField('results', ArrayType(
        StructType([StructField('c', DoubleType(), True), 
                    StructField('h', DoubleType(), True), 
                    StructField('l', DoubleType(), True), 
                    StructField('n', LongType(), True), 
                    StructField('o', DoubleType(), True), 
                    StructField('t', LongType(), True), 
                    StructField('v', DoubleType(), True), 
                    StructField('vw', DoubleType(), True)]), \
        True), True), 
    StructField('resultsCount', LongType(), True), 
    StructField('status', StringType(), True), StructField('ticker', StringType(), True)])

In [0]:
tickers_price_df = spark.read \
    .schema(tickers_price_schema) \
    .json(f"{raw_folder_path}/tickers_price/*")

# display(tickers_price_df)

In [0]:
from pyspark.sql.functions import col, explode, from_unixtime

tickers_price_final_df = tickers_price_df \
        .select("ticker", explode("results").alias("result")) \
        .select("ticker", 
                col("result.c").alias("close_price"),
                col("result.h").alias("highest_price"),
                col("result.l").alias("lowest_price"),
                col("result.o").alias("open_price"), 
                col("result.t").alias("timestamp")) \
        .withColumn("date", from_unixtime(col("timestamp") / 1000).cast("date")) \
        .drop("timestamp")

# display(tickers_price_exploded_df)

In [0]:
mergeCondition = """target.ticker = source.ticker AND 
                    target.date = source.date"""


In [0]:
incrementalLoadDelta(input_df=tickers_price_final_df, databaseName="engineering_processed", tableName="tickers_price", 
                     folderPath=processed_folder_path, partitionField="ticker",mergeCondition=mergeCondition)

In [0]:
%sql
SELECT * FROM engineering_processed.tickers_price