In [1]:
import pickle

from datetime import datetime, timedelta

from article_search import ArticleEmbeddingSearch
from llm_helper import LLMHelper
from prompts import ClientLandingPagePrompts, AdGenerationPrompts
from scrape_urls import ScrapeURLs

from dotenv import load_dotenv
load_dotenv("env/alphix_test.env")

  from .autonotebook import tqdm as notebook_tqdm


True

### Scrape the data

In order to know which articles are most relevant to our client page's investment view or market commentary we need to actually get the articles from the URL links

In [2]:
scrape_urls = ScrapeURLs(xlsx_sheet="ML Engineer - URL and news articles examples by client.xlsx")

  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str
  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str
  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str


First scrape the client URLs

In [3]:
client_urls = scrape_urls.webscrape_client_urls()

Get the about us URLs for more context on what the client's offerings are and what they specialise in

In [4]:
about_us_urls = ["https://www.pimco.com/gb/en/about-us?_gl=1*bnzbrf*_up*MQ..&gclid=CjwKCAjw4efDBhATEiwAaDBpbtqfnbU7L0ORmwK1I7PEE3s__gGy5L_7FIkFoQmeRBLgjK5WcffEjBoCaFYQAvD_BwE&gclsrc=aw.ds&gbraid=0AAAAADFc_uWUeOgQKLXA_8uIAkrPHF9q-",
                 "https://www.troweprice.com/financial-intermediary/uk/en/about.html",
                 "https://www.statestreet.com/us/en/about/our-story"
]

client_about_us = scrape_urls.webscrape_client_about_us_urls(about_us_urls)

Scrape the URLs for the different sheets:

In [None]:
for sheet_name in scrape_urls.sheet_names:
    print(sheet_name)
    await scrape_urls.webscrape_relevant_docs(sheet_name=sheet_name)

In [5]:
# URLs with scraped articles saved earlier
with open('test_docs.pickle', 'rb') as handle:
    sheet_docs = pickle.load(handle)

### Summarise

Summarise the articles in each of the sheets:

In [6]:
llm_helper = LLMHelper()

t_rowe_df = llm_helper.summarise_df(sheet_docs['T Rowe Price'])
pimco_df = llm_helper.summarise_df(sheet_docs['PIMCO'])
ss_df = llm_helper.summarise_df(sheet_docs['State Street'])

Summarise the client urls:

In [7]:
pimco_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt, 
    ClientLandingPagePrompts.summarise_client_message(
        landing_page_txt=client_urls['https://www.pimco.com/us/en/insights/fed-policymakers-split-decision'],
        about_us_txt=client_about_us['https://www.pimco.com/gb/en/about-us?_gl=1*bnzbrf*_up*MQ..&gclid=CjwKCAjw4efDBhATEiwAaDBpbtqfnbU7L0ORmwK1I7PEE3s__gGy5L_7FIkFoQmeRBLgjK5WcffEjBoCaFYQAvD_BwE&gclsrc=aw.ds&gbraid=0AAAAADFc_uWUeOgQKLXA_8uIAkrPHF9q-'] 
        ))

ss_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt,
    ClientLandingPagePrompts.summarise_client_message(
        client_urls['https://www.ssga.com/uk/en_gb/institutional/capabilities/esg'],
        about_us_txt=client_about_us['https://www.statestreet.com/us/en/about/our-story']
        ))


t_rowe_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt, 
    ClientLandingPagePrompts.summarise_client_message(
        client_urls['https://www.troweprice.com/financial-intermediary/uk/en/lp/global-market-outlook.html'],
        about_us_txt=client_about_us['https://www.troweprice.com/financial-intermediary/uk/en/about.html']
        ))

### Store embedding for search:

Convert the article summaries embedding and store them. Can now embed summarised "investment philosophy", "key value proposition" and "key themes messaging" to find most relevant ads:

In [8]:
pimco_embedding_search = ArticleEmbeddingSearch(pimco_df)
t_rowe_embedding_search = ArticleEmbeddingSearch(t_rowe_df)
ss_embedding_search = ArticleEmbeddingSearch(ss_df)

Get 5 most relevant ads:

In [9]:
top_k = 5
from_date = datetime.now() - timedelta(days=30)

pimco_search_string = (
    pimco_summary['core_investment_philosophy'] + " " +
    pimco_summary['key_value_proposition'] + " " +
    ", ".join(pimco_summary['key_themes_messaging'])
)

relevant_pimco_articles = pimco_embedding_search.search(pimco_search_string, top_k=top_k, from_date=from_date)


ss_search_string = (
    ss_summary['core_investment_philosophy'] + " " +
    ss_summary['key_value_proposition'] + " " +
    ", ".join(ss_summary['key_themes_messaging'])
)

relevant_ss_articles = ss_embedding_search.search(ss_search_string, top_k=top_k, from_date=from_date)


t_rowe_search_string = (
    t_rowe_summary['core_investment_philosophy'] + " " +
    t_rowe_summary['key_value_proposition'] + " " +
    ", ".join(t_rowe_summary['key_themes_messaging'])
)

relevant_t_rowe_articles = t_rowe_embedding_search.search(t_rowe_search_string, top_k=top_k, from_date=from_date)

### Generate Ad + Ad Imagery

Generate ad copy for the different ad types:

In [14]:
pimco_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("PIMCO", pimco_summary, relevant_pimco_articles))
ss_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("State Street", ss_summary, relevant_ss_articles))
t_rowe_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("T Rowe Price", t_rowe_summary, relevant_t_rowe_articles))

Can now generate some basic images of the ads:

In [15]:
from pprint import pprint
for ad in t_rowe_ad_copy['ad_creatives']:
    pprint(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-4kcns9TfdKLHWp7wx2CPIpKS.png?st=2025-07-23T14%3A19%3A38Z&se=2025-07-23T16%3A19%3A38Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T12%3A09%3A20Z&ske=2025-07-24T12%3A09%3A20Z&sks=b&skv=2024-08-04&sig=zSfHH3vMlNoEHGm39av%2BavwK6jLn4cyV2NJyWCrgeeI%3D',
 {'call_to_action': 'Learn More',
  'format': 'LinkedIn Single Image Ad',
  'headline': 'Navigate Uncertainty with Active Value Investing',
  'imagery_suggestion': 'A professional middle-aged investment manager '
                        'reviewing detailed multi-screen data charts inside a '
                        'modern office, natural sunlight filtering through '
                        'large windows, focused expression reflecting '
                        'thoughtful analysis amid complex market conditions.',


In [16]:
for ad in ss_ad_copy['ad_creatives']:
    pprint(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-ZIcQKecJ5xOPYwqtgfmK1FIb.png?st=2025-07-23T14%3A21%3A04Z&se=2025-07-23T16%3A21%3A04Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T10%3A06%3A23Z&ske=2025-07-24T10%3A06%3A23Z&sks=b&skv=2024-08-04&sig=Qcxd6KbcxpIr/48p3NmHDivqhxL4ryfH6lOSKNRZqNQ%3D',
 {'call_to_action': 'Learn More',
  'format': 'LinkedIn Single Image Ad',
  'headline': 'Align Portfolios with Science-Based Climate Solutions',
  'imagery_suggestion': 'A professional institutional investor reviewing '
                        'detailed sustainability data and climate metrics on a '
                        'large digital dashboard in a modern office, with '
                        'graphs and charts showing emission reduction and '
                        'climate risk indicators.',
  'linkage_justifi

In [17]:
for ad in pimco_ad_copy['ad_creatives']:
    pprint(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-91mksWN6R6BXKBWN3YBbF7Gr.png?st=2025-07-23T14%3A22%3A40Z&se=2025-07-23T16%3A22%3A40Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T02%3A27%3A50Z&ske=2025-07-24T02%3A27%3A50Z&sks=b&skv=2024-08-04&sig=Cgcw7pRstNneYMrkEOBFUOCBo%2BuwCW%2B3tdsMvF5HaK4%3D',
 {'call_to_action': 'Learn More',
  'format': 'LinkedIn Single Image Ad',
  'headline': 'Navigating Fed Uncertainty with Active Fixed Income',
  'imagery_suggestion': 'A senior institutional investor in a modern office, '
                        'attentively reviewing detailed market charts and '
                        'economic reports on dual computer screens, with a '
                        'muted city skyline visible through large windows, '
                        'conveying cautious analysis in a real-worl