In [1]:
import pickle

from datetime import datetime, timedelta

from article_search import ArticleEmbeddingSearch
from llm_helper import LLMHelper
from prompts import ClientLandingPagePrompts, AdGenerationPrompts
from scrape_urls import ScrapeURLs

from dotenv import load_dotenv
load_dotenv("env/alphix_test.env")

  from .autonotebook import tqdm as notebook_tqdm


True

### Scrape the data

In order to know which articles are most relevant to our client page's investment view or market commentary we need to actually get the articles from the URL links

In [2]:
scrape_urls = ScrapeURLs(xlsx_sheet="ML Engineer - URL and news articles examples by client.xlsx")

  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str
  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str
  client_landing_urls.append(df.iloc[0][0][5:].strip()) # Append client landing url str


First scrape the client URLs

In [3]:
client_urls = scrape_urls.webscrape_client_urls()

Get the about us URLs for more context on what the client's offerings are and what they specialise in

In [4]:
about_us_urls = ["https://www.pimco.com/gb/en/about-us?_gl=1*bnzbrf*_up*MQ..&gclid=CjwKCAjw4efDBhATEiwAaDBpbtqfnbU7L0ORmwK1I7PEE3s__gGy5L_7FIkFoQmeRBLgjK5WcffEjBoCaFYQAvD_BwE&gclsrc=aw.ds&gbraid=0AAAAADFc_uWUeOgQKLXA_8uIAkrPHF9q-",
                 "https://www.troweprice.com/financial-intermediary/uk/en/about.html",
                 "https://www.statestreet.com/us/en/about/our-story"
]

client_about_us = scrape_urls.webscrape_client_about_us_urls(about_us_urls)

Scrape the URLs for the different sheets:

In [None]:
for sheet_name in scrape_urls.sheet_names:
    print(sheet_name)
    await scrape_urls.webscrape_relevant_docs(sheet_name=sheet_name)

In [5]:
# URLs with scraped articles saved earlier
with open('test_docs.pickle', 'rb') as handle:
    sheet_docs = pickle.load(handle)

### Summarise

Summarise the articles in each of the sheets:

In [6]:
llm_helper = LLMHelper()

t_rowe_df = llm_helper.summarise_df(sheet_docs['T Rowe Price'])
pimco_df = llm_helper.summarise_df(sheet_docs['PIMCO'])
ss_df = llm_helper.summarise_df(sheet_docs['State Street'])

Summarise the client urls:

In [7]:
pimco_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt, 
    ClientLandingPagePrompts.summarise_client_message(
        landing_page_txt=client_urls['https://www.pimco.com/us/en/insights/fed-policymakers-split-decision'],
        about_us_txt=client_about_us['https://www.pimco.com/gb/en/about-us?_gl=1*bnzbrf*_up*MQ..&gclid=CjwKCAjw4efDBhATEiwAaDBpbtqfnbU7L0ORmwK1I7PEE3s__gGy5L_7FIkFoQmeRBLgjK5WcffEjBoCaFYQAvD_BwE&gclsrc=aw.ds&gbraid=0AAAAADFc_uWUeOgQKLXA_8uIAkrPHF9q-'] 
        ))

ss_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt,
    ClientLandingPagePrompts.summarise_client_message(
        client_urls['https://www.ssga.com/uk/en_gb/institutional/capabilities/esg'],
        about_us_txt=client_about_us['https://www.statestreet.com/us/en/about/our-story']
        ))


t_rowe_summary = llm_helper.summarise_txt(
    ClientLandingPagePrompts.sys_prompt, 
    ClientLandingPagePrompts.summarise_client_message(
        client_urls['https://www.troweprice.com/financial-intermediary/uk/en/lp/global-market-outlook.html'],
        about_us_txt=client_about_us['https://www.troweprice.com/financial-intermediary/uk/en/about.html']
        ))

### Store embedding for search:

Convert the article summaries embedding and store them. Can now embed summarised "investment philosophy", "key value proposition" and "key themes messaging" to find most relevant ads:

In [8]:
pimco_embedding_search = ArticleEmbeddingSearch(pimco_df)
t_rowe_embedding_search = ArticleEmbeddingSearch(t_rowe_df)
ss_embedding_search = ArticleEmbeddingSearch(ss_df)

Get 5 most relevant ads:

In [9]:
top_k = 5
from_date = datetime.now() - timedelta(days=30)

pimco_search_string = (
    pimco_summary['core_investment_philosophy'] + " " +
    pimco_summary['key_value_proposition'] + " " +
    ", ".join(pimco_summary['key_themes_messaging'])
)

relevant_pimco_articles = pimco_embedding_search.search(pimco_search_string, top_k=top_k, from_date=from_date)


ss_search_string = (
    ss_summary['core_investment_philosophy'] + " " +
    ss_summary['key_value_proposition'] + " " +
    ", ".join(ss_summary['key_themes_messaging'])
)

relevant_ss_articles = ss_embedding_search.search(ss_search_string, top_k=top_k, from_date=from_date)


t_rowe_search_string = (
    t_rowe_summary['core_investment_philosophy'] + " " +
    t_rowe_summary['key_value_proposition'] + " " +
    ", ".join(t_rowe_summary['key_themes_messaging'])
)

relevant_t_rowe_articles = t_rowe_embedding_search.search(t_rowe_search_string, top_k=top_k, from_date=from_date)

### Generate Ad + Ad Imagery

Generate ad copy for the different ad types:

In [10]:
pimco_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("PIMCO", pimco_summary, relevant_pimco_articles))
ss_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("State Street", ss_summary, relevant_ss_articles))
t_rowe_ad_copy = llm_helper.summarise_txt(AdGenerationPrompts.sys_prompt, AdGenerationPrompts.generate_ad_copy("T Rowe Price", t_rowe_summary, relevant_t_rowe_articles))

Can now generate some basic images of the ads:

In [11]:
from pprint import pprint
for ad in t_rowe_ad_copy['ad_creatives']:
    pprint(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-iuSKe6y2cL9Kd1Akh9ZL8yfk.png?st=2025-07-23T15%3A06%3A42Z&se=2025-07-23T17%3A06%3A42Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T10%3A16%3A19Z&ske=2025-07-24T10%3A16%3A19Z&sks=b&skv=2024-08-04&sig=OkAe5QM9L6Dw2hQvO/gX6EL9Au8TgMCBDora5Uer0p8%3D',
 {'call_to_action': 'Learn More',
  'format': 'LinkedIn Single Image Ad',
  'headline': 'Navigating Resilient Value in Changing Markets',
  'imagery_suggestion': 'A vast landscape at dawn with a powerful tide pushing '
                        'large, resilient stepping stones arranged in a path '
                        'toward a rising sun, symbolizing steady navigation '
                        'through turbulent waters; subtle shadows highlight '
                        'the stones’ solidity amidst gently crashing 

In [12]:
for ad in ss_ad_copy['ad_creatives']:
    pprint(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-lsiw7zNWAh4kRBQiojQPzy9o.png?st=2025-07-23T15%3A08%3A08Z&se=2025-07-23T17%3A08%3A08Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T06%3A25%3A11Z&ske=2025-07-24T06%3A25%3A11Z&sks=b&skv=2024-08-04&sig=fovzF1bwFpUs7dRyFyF8MgQ8Jf5tOkuzhY40u099%2Bb4%3D',
 {'call_to_action': 'Learn More',
  'format': 'LinkedIn Single Image Ad',
  'headline': 'Accelerate Climate Solutions Investing Now',
  'imagery_suggestion': 'A surreal landscape where a large translucent tree '
                        'made of digital data streams grows from an industrial '
                        'cityscape, blending nature and technology, '
                        'illuminated by soft sunrise light symbolizing hope '
                        'and sustainable growth.',
  'linkage_justification': 'T

In [13]:
for ad in pimco_ad_copy['ad_creatives']:
    pprint(llm_helper.generate_ad_image(ad=ad))

('https://oaidalleapiprodscus.blob.core.windows.net/private/org-NsFKv748OTOT3yTJGe1Ygqdp/user-Imbxj70XMCLsnJPeOiCRMRio/img-vY9Jo4wloW1JT3zDrFGthOES.png?st=2025-07-23T15%3A09%3A46Z&se=2025-07-23T17%3A09%3A46Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-07-23T12%3A22%3A48Z&ske=2025-07-24T12%3A22%3A48Z&sks=b&skv=2024-08-04&sig=Ifztsw2N4X/wPxNkPGsnZF5qNdjWLRFOHcvRRQIn/Kg%3D',
 {'call_to_action': 'Learn More',
  'format': 'LinkedIn Single Image Ad',
  'headline': 'Navigate Rate Uncertainty with Active Fixed Income',
  'imagery_suggestion': 'A towering lighthouse standing firm amid turbulent, '
                        'dark ocean waves under an overcast sky, with beams of '
                        'warm light cutting through fog and rain, symbolizing '
                        'guidance and stability amidst economic volatility.',
  'linkage_justification': 'The ad aligns PIMCO’s expertise in 