In [1]:
import os
import pickle

from article_search import ArticleEmbeddingSearch
from llm_helper import LLMHelper
from prompts import ClientLandingPagePrompts, AdGenerationPrompts
from scrape_urls import ScrapeURLs

from dotenv import load_dotenv
load_dotenv("env/alphix_test.env")

  from .autonotebook import tqdm as notebook_tqdm


True

### Scrape the data

In order to know which articles are most relevant to our client page's investment view or market commentary we need to actually get the articles from the URL links

In [2]:
scrape_urls = ScrapeURLs(xlsx_sheet="ML Engineer - URL and news articles examples by client.xlsx")

  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str
  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str
  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str


First scrape the client URLs

In [3]:
client_urls = scrape_urls.webscrape_client_urls()

Get the about us URLs for more context on what the client's offerings are and what they specialise in

In [6]:
about_us_urls = ["https://www.pimco.com/gb/en/about-us?_gl=1*bnzbrf*_up*MQ..&gclid=CjwKCAjw4efDBhATEiwAaDBpbtqfnbU7L0ORmwK1I7PEE3s__gGy5L_7FIkFoQmeRBLgjK5WcffEjBoCaFYQAvD_BwE&gclsrc=aw.ds&gbraid=0AAAAADFc_uWUeOgQKLXA_8uIAkrPHF9q-",
                 "https://www.troweprice.com/financial-intermediary/uk/en/about.html",
                 "https://www.statestreet.com/us/en/about/our-story"
]

client_about_us = scrape_urls.webscrape_client_about_us_urls(about_us_urls)

Scrape the URLs for the different sheets:

In [None]:
for sheet_name in scrape_urls.sheet_names:
    print(sheet_name)
    await scrape_urls.webscrape_relevant_docs(sheet_name=sheet_name)

In [7]:
# URLs with scraped articles saved earlier
with open('test_docs.pickle', 'rb') as handle:
    sheet_docs = pickle.load(handle)

Summarise the articles in each of the sheets:

In [None]:
llm_helper = LLMHelper()

t_rowe_df = llm_helper.summarise_df(sheet_docs['T Rowe Price'])
pimco_df = llm_helper.summarise_df(sheet_docs['PIMCO'])
ss_df = llm_helper.summarise_df(sheet_docs['State Street'])

Summarise the client urls:

In [12]:
pimco_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt, 
    ClientLandingPagePrompts.summarise_client_message(
        landing_page_txt=client_urls['https://www.pimco.com/us/en/insights/fed-policymakers-split-decision'],
        about_us_txt=client_about_us['https://www.pimco.com/gb/en/about-us?_gl=1*bnzbrf*_up*MQ..&gclid=CjwKCAjw4efDBhATEiwAaDBpbtqfnbU7L0ORmwK1I7PEE3s__gGy5L_7FIkFoQmeRBLgjK5WcffEjBoCaFYQAvD_BwE&gclsrc=aw.ds&gbraid=0AAAAADFc_uWUeOgQKLXA_8uIAkrPHF9q-'] 
        ))

ss_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt,
    ClientLandingPagePrompts.summarise_client_message(
        client_urls['https://www.ssga.com/uk/en_gb/institutional/capabilities/esg'],
        about_us_txt=client_about_us['https://www.statestreet.com/us/en/about/our-story']
        ))


t_rowe_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt, 
    ClientLandingPagePrompts.summarise_client_message(
        client_urls['https://www.troweprice.com/financial-intermediary/uk/en/lp/global-market-outlook.html'],
        about_us_txt=client_about_us['https://www.troweprice.com/financial-intermediary/uk/en/about.html']
        ))

Convert the article summaries embedding and store them. Can now embed summarised "investment philosophy", "key value proposition" and "key themes messaging" to find most relevant ads:

In [14]:
pimco_embedding_search = ArticleEmbeddingSearch(pimco_df['summary'].apply(lambda x: x['article_summary']).tolist())
t_rowe_embedding_search = ArticleEmbeddingSearch(t_rowe_df['summary'].apply(lambda x: x['article_summary']).tolist())
ss_embedding_search = ArticleEmbeddingSearch(ss_df['summary'].apply(lambda x: x['article_summary']).tolist())

Get 5 most relevant ads:

In [15]:
pimco_search_string = (
    pimco_summary['core_investment_philosophy'] + " " +
    pimco_summary['key_value_proposition'] + " " +
    ", ".join(pimco_summary['key_themes_messaging'])
)

relevant_pimco_articles = pimco_embedding_search.search(pimco_search_string, top_k=5)


ss_search_string = (
    ss_summary['core_investment_philosophy'] + " " +
    ss_summary['key_value_proposition'] + " " +
    ", ".join(ss_summary['key_themes_messaging'])
)

relevant_ss_articles = ss_embedding_search.search(ss_search_string, top_k=5)


t_rowe_search_string = (
    t_rowe_summary['core_investment_philosophy'] + " " +
    t_rowe_summary['key_value_proposition'] + " " +
    ", ".join(t_rowe_summary['key_themes_messaging'])
)

relevant_t_rowe_articles = t_rowe_embedding_search.search(t_rowe_search_string, top_k=5)

Generate ad copy for the different ad types:

In [17]:
pimco_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("PIMCO", pimco_summary, relevant_pimco_articles))
ss_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("State Street", ss_summary, relevant_ss_articles))
t_rowe_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("T Rowe Price", t_rowe_summary, relevant_t_rowe_articles))

Can now generate some basic images of the ads:

In [18]:
for ad in t_rowe_ad_copy['ad_creatives']:
    print(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-SZpeRrgudWA5D7G4IIaLVrJu.png?st=2025-07-23T12%3A07%3A44Z&se=2025-07-23T14%3A07%3A44Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T12%3A34%3A51Z&ske=2025-07-24T12%3A34%3A51Z&sks=b&skv=2024-08-04&sig=c8X5vwijDHTFjGGHIGTiSOoCQvwGSs%2BN7rdrGEDfsFc%3D', {'format': 'LinkedIn Single Image Ad', 'headline': 'Build Resilient Portfolios with Strategic Diversification', 'main_copy': 'Navigate global uncertainties through T. Rowe Price’s expert-led, multi-asset strategies. Leverage active allocation to capture emerging market opportunities while managing risk across value, equities, and inflation-protected assets.', 'call_to_action': 'Learn More', 'imagery_suggestion': 'A sophisticated global financial dashboard overlaying a world map highlighting diversified asset classes

In [19]:
for ad in ss_ad_copy['ad_creatives']:
    print(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-nV2rFwTF7HsLaZGCNmOBjUNg.png?st=2025-07-23T12%3A10%3A26Z&se=2025-07-23T14%3A10%3A26Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T07%3A05%3A03Z&ske=2025-07-24T07%3A05%3A03Z&sks=b&skv=2024-08-04&sig=8%2BQgodSOG23K/AOOBZPOyyrtuj90h1qUxP4Wu9qVxfc%3D', {'format': 'LinkedIn Single Image Ad', 'headline': 'Advance Climate-Aligned Investing with State Street', 'main_copy': 'Harness data-driven, tailored sustainable solutions to power portfolios that meet emerging climate frameworks and long-term decarbonization goals.', 'call_to_action': 'Learn More', 'imagery_suggestion': 'A sophisticated digital dashboard visual displaying climate impact metrics and portfolio analytics, overlaid on a globe with green and blue hues, symbolizing global sustainability insights and forw

In [None]:
for ad in pimco_ad_copy['ad_creatives']:
    print(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-pK00revsbdSCtDvGCCnskALy.png?st=2025-07-23T12%3A12%3A48Z&se=2025-07-23T14%3A12%3A48Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T12%3A31%3A59Z&ske=2025-07-24T12%3A31%3A59Z&sks=b&skv=2024-08-04&sig=jkdCu2VAZnsjmE2x/ulYik0GYZP5%2BdkHl1aXeX46BNc%3D', {'format': 'LinkedIn Single Image Ad', 'headline': 'Navigate Economic Uncertainty with Active Fixed Income', 'main_copy': 'Amid rising mortgage rates and Fed policy uncertainty, PIMCO delivers resilient bond strategies designed for disciplined income generation and active risk management.', 'call_to_action': 'Learn More', 'imagery_suggestion': 'Professional, clean visual showing a confident institutional investor reviewing diversified fixed income portfolios on digital charts, with faint overlays of fluctuating inte