In [None]:
# You must choose a date and product to use for the queries in this notebook. 


# An example is:
dt = '2023-05-25'
product = 'AAPL'


In [None]:
# Import the MayStreet Data Python library - this is already provided for you inside Workbench.
import maystreet_data
# Import the well known Python Pandas library (https://pandas.pydata.org)
import pandas as pd

# retrieve a subset of news items for a specific day
records_iter = maystreet_data.query(
    maystreet_data.DataSource.DATA_LAKE,
    f"""
    SELECT
        dt, guid, timestamps[1], data.headline, data.body, data.subjects
    FROM 
        "prod_lake.p_mst_data_lake".mt_news
    WHERE 
        dt='{dt}'
    LIMIT 10
""",
)

# Create a Pandas Data Frame from the iterator; an iterator will return one row at a time but we'd like
# to see them all.
data = pd.DataFrame(records_iter)

# Display the data in the Jupyter output cell.
data


In [None]:
# Import the MayStreet Data Python library - this is already provided for you inside Workbench.
import maystreet_data
# Import the well known Python Pandas library (https://pandas.pydata.org)
import pandas as pd

# retrieve a mapping of RIC codes for a given ticker
records_iter = maystreet_data.query(
    maystreet_data.DataSource.DATA_LAKE,
    f"""
    SELECT
        *
    FROM 
        "prod_lake.p_mst_data_lake".mt_pid_to_ticker
    WHERE 
        dt='{dt}' AND ticker='{product}'
    LIMIT 10
""",
)

# Create a Pandas Data Frame from the iterator; an iterator will return one row at a time but we'd like
# to see them all.
data = pd.DataFrame(records_iter)

# Display the data in the Jupyter output cell.
data

In [None]:
# Import the MayStreet Data Python library - this is already provided for you inside Workbench.
import maystreet_data
# Import the well known Python Pandas library (https://pandas.pydata.org)
import pandas as pd

# Given a ticker (ex: 'COIN') find all the news items that have a related subject.
# For efficiency purposes, this query is limited by date.
records_iter = maystreet_data.query(
    maystreet_data.DataSource.DATA_LAKE,
    f"""
with cross_join_subjects as (
    SELECT guid, 
    subject
    FROM mt_news
    CROSS JOIN UNNEST(data.subjects) AS t(subject)
    WHERE dt = '{dt}'
),
pids_from_news as (
    SELECT guid, 
    regexp_replace(subject, '(^P:)(.*)', '$2') AS pid,
    subject
    FROM cross_join_subjects
    WHERE regexp_like(subject, '^P:*')
),
rics_from_news as (
    SELECT guid, 
    regexp_replace(subject, '(^R:)(.*)', '$2') AS ric, 
    subject
    FROM cross_join_subjects
    WHERE regexp_like(subject, '^R:*')
),
mapping_join as (
    SELECT DISTINCT ticker, ric, CAST(mt_pid_to_ticker.permid AS VARCHAR) AS string_pid
    FROM p_mst_data_lake.mt_pid_to_ticker 
    JOIN p_mst_data_lake.mt_pid_to_ric 
    ON p_mst_data_lake.mt_pid_to_ticker.permid = p_mst_data_lake.mt_pid_to_ric.permid 
    WHERE p_mst_data_lake.mt_pid_to_ticker.dt='{dt}' AND p_mst_data_lake.mt_pid_to_ric.dt = '{dt}'
),
relation_of_pids_and_rics as (
    SELECT DISTINCT ric, string_pid AS pid, ticker
    FROM mapping_join
    WHERE ticker = '{product}'
),
ric_guids_we_want as (
    SELECT DISTINCT guid FROM relation_of_pids_and_rics 
    JOIN rics_from_news ON rics_from_news.ric = relation_of_pids_and_rics.ric
),
pid_guids_we_want as (
    SELECT DISTINCT guid FROM relation_of_pids_and_rics 
    JOIN pids_from_news ON pids_from_news.pid = relation_of_pids_and_rics.pid
),
guids_we_want as (
    SELECT * FROM pid_guids_we_want UNION SELECT * FROM ric_guids_we_want LIMIT 5
)
SELECT data.body FROM guids_we_want JOIN mt_news ON guids_we_want.guid = mt_news.guid
""",
)

# Create a Pandas Data Frame from the iterator; an iterator will return one row at a time but we'd like
# to see them all.
data = pd.DataFrame(records_iter)

# Display the data in the Jupyter output cell.
data

In [None]:
data
