### Introduction

In this project, the goal is to collect all AIM tickers from the London Stock Exchange site. These tickers will be stored in a list and subsequently saved as a CSV file for use in our AIM projects.


In [6]:
# Initially, BeautifulSoup was not a viable option as the webpage retrieves data via an API call.
# Therefore, the API endpoint was identified, and a cURL converter was used to obtain the necessary headers and URL.
# The URLs were then manipulated to ensure they could be utilized within the function and loop.
# This approach allowed the code to retrieve data efficiently by making HTTP GET requests to the constructed URLs,


import requests
import requests

headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8',
    'origin': 'https://www.londonstockexchange.com',
    'priority': 'u=1, i',
    'referer': 'https://www.londonstockexchange.com/',
    'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Microsoft Edge";v="126"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0',
}

response = requests.get(
    'https://api.londonstockexchange.com/api/v1/pages?path=live-markets/market-data-dashboard/price-explorer&parameters=markets%253DAIM',
    headers=headers,
)

# Extract the requested URL from curl converter so we can format it for the def fetch_page_data 
requested_url = response.url
print(requested_url)


# Output the status code of the response
response.status_code

https://api.londonstockexchange.com/api/v1/pages?path=live-markets/market-data-dashboard/price-explorer&parameters=markets%253DAIM


200

In [2]:
# Page 0 is different from all other pages, so it will be treated separately.
# https://api.londonstockexchange.com/api/v1/pages?path=live-markets/market-data-dashboard/price-explorer&parameters=markets%253DAIM
# https://api.londonstockexchange.com/api/v1/pages?path=live-markets/market-data-dashboard/price-explorer&parameters=markets%253DAIM%2526page%253D1

In [7]:
# page 0 to get data separately
requested_page_0 = requested_url
print(requested_page_0)

https://api.londonstockexchange.com/api/v1/pages?path=live-markets/market-data-dashboard/price-explorer&parameters=markets%253DAIM


In [8]:
# now lets's create a base_url (without page numbers)
# to be uses in the loop where we can increment the page on each iteration 

# Define the page parameter to append
page_parameter = '%2526page%253D'

# Append the page parameter to the requested URL
base_url = requested_url + page_parameter

# Print the modified URL to verify
print(base_url)

https://api.londonstockexchange.com/api/v1/pages?path=live-markets/market-data-dashboard/price-explorer&parameters=markets%253DAIM%2526page%253D


In [9]:
# The code retrieves data from a series of URLs using the requests.get method.
# For each URL, it extracts all instances of the key-value pair 'tidm' from the JSON response.
# This extraction process is performed in a loop, iterating through multiple pages to ensure all 'tidms' are collected.
# The 'tidms' gathered from each page are stored in a list.
# Once all 'tidms' are gathered, the list is converted into a DataFrame and saved as a CSV file for further use.

# TIDM stands for Tradable Instrument Display Mnemonic. It is a unique code used to 
# identify securities listed on the London Stock Exchange (LSE).


import requests
import pandas as pd
from tqdm import tqdm  # Import tqdm for the progress bar

# Function to fetch data from a specific page
def fetch_page_data(page_number):
    url = f'{base_url}{page_number}'  # Construct the URL with the current page number
    response = requests.get(url, headers=headers)  # Make the request to the URL
    
    if response.status_code == 200:
        return response.json()  # Return JSON response if the request is successful
    else:
        print(f"Failed to fetch data for page {page_number}")
        return None  # Return None if the request fails

# Function to check if a TIDM is valid
def is_valid_tidms(item):
    # Check if the item is a dictionary containing unwanted metadata
    if isinstance(item, dict):
        if 'visibility' in item or 'label' in item or 'errorTe' in item:
            return False
    return True

# Recursive function to find all TIDMs in the data
def find_tidms(data):
    tidms = []

    if isinstance(data, dict):  # If data is a dictionary
        for key, value in data.items():
            if key == "tidm" and is_valid_tidms(value):
                tidms.append(value)
            elif isinstance(value, (dict, list)):
                tidms.extend(find_tidms(value))
    elif isinstance(data, list):  # If data is a list
        for item in data:
            if is_valid_tidms(item):
                tidms.extend(find_tidms(item))

    return tidms

# Initialize an empty list to collect all TIDMs
all_tidms = []

# Fetch the first page separately
first_page_url = requested_page_0
first_page_data = fetch_page_data(first_page_url)
if first_page_data:
    tidms = find_tidms(first_page_data)
    all_tidms.extend(tidms)

# Use tqdm to display a progress bar while looping through subsequent pages
for page_number in tqdm(range(1, 40)):
    page_url = f'{base_url}{page_number}'
    page_data = fetch_page_data(page_number)
    if page_data:
        tidms = find_tidms(page_data)
        all_tidms.extend(tidms)

# Convert the list of TIDMs to a DataFrame

df = pd.DataFrame(all_tidms, columns=['TIDM'])


df['TIDM'] = df['TIDM'].apply(lambda x: f'{x}L' if x.endswith('.') else f'{x}.L')


# Save the DataFrame to a CSV file
df.to_csv('aim_ticker_list.csv', index=False)

# Print DataFrame to verify
df.head()


100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [01:15<00:00,  1.93s/it]


Unnamed: 0,TIDM
0,SPA.L
1,450.L
2,4BB.L
3,4GBL.L
4,88E.L
