## Reading 1st party Data
For the purpose of this demo, we created a synthetic portfolio we know match with subset records we have from Factset and information we get from Bloomberg. This resulted in a portolio of 31 US base equities across multiple sectors. No efforts were done optimizing this portfolio for market performance.

In [0]:
lakehouse_catalog = 'financial_services'
lakehouse_database = 'investment_analytics'
bloomberg_entity_table = 'bloomberg_entity'
lakehouse_table = 'equity_portfolio'
lakehouse_function = 'get_portfolio_data'

In [0]:
import pandas as pd
portfolio = pd.read_json('portfolio.json')
portfolio = spark.createDataFrame(pd.read_json('portfolio.json'))

In [0]:
# Let's join our portfolio with bloomberg data for sector / industry and company description
bloomberg_entity = spark.table(f'{lakehouse_catalog}.{lakehouse_database}.{bloomberg_entity_table}')
display(bloomberg_entity)

In [0]:
from pyspark.sql import functions as F

@F.udf('string')
def get_ticker(identifier):
  # US equity only in that example portfolio
  return '{}-US'.format(identifier.split(' ')[0])

portfolio_df = (
  portfolio
    .withColumn('TICKER', get_ticker('ticker'))
    .join(bloomberg_entity, on='TICKER', how='inner')
    .distinct()
    .select(
      F.col('TICKER').alias('ticker'),
      F.col('LONG_COMP_NAME').alias('company_name'),
      F.col('INDUSTRY_GROUP').alias('industry'),
      F.col('INDUSTRY_SUBGROUP').alias('sector'),
      F.col('CNTRY_OF_DOMICILE').alias('country_of_domicile'),
      F.col('CIE_DES').alias('company_description'),
      F.col('number_of_shares').alias('number_of_shares')
    )
)

In [0]:
display(portfolio_df)

In [0]:
portfolio_df.write.mode('overwrite').saveAsTable(f'{lakehouse_catalog}.{lakehouse_database}.{lakehouse_table}')

In [0]:
_ = sql(f'''CREATE OR REPLACE FUNCTION {lakehouse_catalog}.{lakehouse_database}.{lakehouse_function}()
RETURNS STRING
LANGUAGE SQL
COMMENT 'Return US equity portfolio with ticker information, company name, sectors and active positions'
RETURN SELECT 
COLLECT_LIST(TO_JSON(STRUCT(identifier, ticker, company_name, industry, sector, number_of_shares)))
FROM {lakehouse_catalog}.{lakehouse_database}.{lakehouse_table}
GROUP BY ALL''')