# Ingest tickers details file

In [0]:
%run "../utils/mount_configuration"

In [0]:
%run "../utils/incremental_load"

In [0]:
from pyspark.sql.types import StructType, StructField, LongType, StringType, BooleanType, DoubleType

tickers_details_schema = StructType([
    StructField('request_id', StringType(), True), 
    StructField('results', StructType([
        StructField('active', BooleanType(), True), 
        StructField('address', StructType([
            StructField('address1', StringType(), True), 
            StructField('city', StringType(), True), 
            StructField('postal_code', StringType(), True), 
            StructField('state', StringType(), True)]), True), 
        StructField('branding', StructType([
            StructField('icon_url', StringType(), True), 
            StructField('logo_url', StringType(), True)]), True), 
        StructField('cik', StringType(), True), 
        StructField('composite_figi', StringType(), True), 
        StructField('currency_name', StringType(), True), 
        StructField('description', StringType(), True), 
        StructField('homepage_url', StringType(), True), 
        StructField('list_date', StringType(), True), 
        StructField('locale', StringType(), True), 
        StructField('market', StringType(), True), 
        StructField('market_cap', DoubleType(), True), 
        StructField('name', StringType(), True), 
        StructField('phone_number', StringType(), True), 
        StructField('primary_exchange', StringType(), True), 
        StructField('round_lot', LongType(), True), 
        StructField('share_class_figi', StringType(), True), 
        StructField('share_class_shares_outstanding', LongType(), True), 
        StructField('sic_code', StringType(), True), 
        StructField('sic_description', StringType(), True), 
        StructField('ticker', StringType(), True), 
        StructField('ticker_root', StringType(), True), 
        StructField('total_employees', LongType(), True), 
        StructField('type', StringType(), True), 
        StructField('weighted_shares_outstanding', LongType(), True)]), True), 
    StructField('status', StringType(), True)])

In [0]:
from pyspark.sql.functions import input_file_name, regexp_extract

tickers_details_df = spark.read \
    .schema(tickers_details_schema) \
    .json(f"{raw_folder_path}/tickers_detail/*") \
    .withColumn("filename", regexp_extract(input_file_name(), ".*/(.*)", 1))

# display(tickers_details_df)

In [0]:
from pyspark.sql.functions import split, col, to_date

tickers_details_final_df = tickers_details_df \
        .withColumn("date", to_date(split(split(col("filename"), "_")[2], "\\.")[0])) \
        .select("results.ticker", "results.name", "results.market_cap",
                "results.type", "results.primary_exchange", "results.weighted_shares_outstanding",
                "results.total_employees", "date")

# display(tickers_details_final_df)

In [0]:
mergeCondition = """target.ticker = source.ticker AND 
                    target.date = source.date"""

In [0]:
incrementalLoadDelta(input_df=tickers_details_final_df, databaseName="engineering_processed", tableName="tickers_details", 
                     folderPath=processed_folder_path, partitionField="ticker",mergeCondition=mergeCondition)

In [0]:
%sql
SELECT * FROM engineering_processed.tickers_details;