In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import csv
import re
import os
import time

# Load existing data to skip already-scraped entries
existing_data = set()
if os.path.exists("issuer_data.csv"):
    with open("issuer_data.csv", "r", encoding="utf-8") as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header
        for row in reader:
            # Create a unique key for each row (issuer + year)
            existing_data.add((row[0], int(row[1])))

# URL for the target page
base_url = "https://www.mse.mk/en/stats/symbolhistory/ALKB"
session = requests.Session()

# Get initial page
response = session.get(base_url)
soup = BeautifulSoup(response.text, "html.parser")

# Extract issuer codes from the dropdown menu
dropdown = soup.find(id="Code")
issuers = [
    option["value"]
    for option in dropdown.find_all("option")
    if option.get("value") and not re.search(r'\d', option["value"])
]
print("Issuers found:", issuers)

# Prepare CSV
with open("issuer_data.csv", "a", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    if not existing_data:  # Write header if file was empty
        writer.writerow(["Issuer", "Year", "Date", "Last trade price", "Max", "Min", "Avg.Price", "%chg.", "Volume", "Turnover in BEST in denars", "Total turnover in denars"])

    # Start timer for entire script
    script_start_time = time.time()

    # Loop through the last 10 years
    for i in range(10):
        year = datetime.now().year - i
        from_date = f"01/01/{year}"
        to_date = f"12/31/{year}"

        # Loop through each issuer
        for issuer in issuers:
            if (issuer, year) in existing_data:
                print(f"Skipping {issuer} for {year} as it already exists in the file.")
                continue  # Skip if data for this issuer and year already exists

            # Start timer for each issuer-year data retrieval
            retrieval_start_time = time.time()

            # Build payload with issuer and date range to fetch data
            payload = {
                "Code": issuer,
                "FromDate": from_date,
                "ToDate": to_date
            }

            try:
                # Submit request with date range and issuer
                data_response = session.post(base_url, data=payload)
                data_soup = BeautifulSoup(data_response.text, "html.parser")

                # Find the results table
                results_table = data_soup.find(id="resultsTable")
                if not results_table:
                    print(f"No data found for {issuer} in {year}")
                    continue

                # Extract rows of data from the table
                rows = results_table.find("tbody").find_all("tr")
                for row in rows:
                    columns = [cell.get_text(strip=True) for cell in row.find_all("td")]
                    if columns and any(columns):  # Skip empty rows
                        writer.writerow([issuer, year] + columns)

                # Calculate time taken for this retrieval
                retrieval_end_time = time.time()
                print(f"Data for {issuer} in {year} saved in {retrieval_end_time - retrieval_start_time:.2f} seconds.")

            except Exception as e:
                print(f"Error fetching data for {issuer} in {year}: {e}")
                continue

    # End timer for entire script
    script_end_time = time.time()
    print(f"Script completed in {script_end_time - script_start_time:.2f} seconds.")


Issuers found: ['ADIN', 'ALK', 'ALKB', 'AMBR', 'AMEH', 'APTK', 'ATPP', 'AUMK', 'BANA', 'BGOR', 'BIKF', 'BIM', 'BLTU', 'CBNG', 'CDHV', 'CEVI', 'CKB', 'CKBKO', 'DEBA', 'DIMI', 'EDST', 'ELMA', 'ELNC', 'ENER', 'ENSA', 'EUHA', 'EUMK', 'EVRO', 'FAKM', 'FERS', 'FKTL', 'FROT', 'FUBT', 'GALE', 'GDKM', 'GECK', 'GECT', 'GIMS', 'GRDN', 'GRNT', 'GRSN', 'GRZD', 'GTC', 'GTRG', 'IJUG', 'INB', 'INHO', 'INOV', 'INPR', 'INTP', 'JAKO', 'JUSK', 'KARO', 'KDFO', 'KJUBI', 'KKST', 'KLST', 'KMB', 'KMPR', 'KOMU', 'KONF', 'KONZ', 'KORZ', 'KPSS', 'KULT', 'KVAS', 'LAJO', 'LHND', 'LOTO', 'LOZP', 'MAGP', 'MAKP', 'MAKS', 'MB', 'MERM', 'MKSD', 'MLKR', 'MODA', 'MPOL', 'MPT', 'MPTE', 'MTUR', 'MZHE', 'MZPU', 'NEME', 'NOSK', 'OBPP', 'OILK', 'OKTA', 'OMOS', 'OPFO', 'OPTK', 'ORAN', 'OSPO', 'OTEK', 'PELK', 'PGGV', 'PKB', 'POPK', 'PPIV', 'PROD', 'PROT', 'PTRS', 'RADE', 'REPL', 'RIMI', 'RINS', 'RZEK', 'RZIT', 'RZIZ', 'RZLE', 'RZLV', 'RZTK', 'RZUG', 'RZUS', 'SBT', 'SDOM', 'SIL', 'SKON', 'SKP', 'SLAV', 'SNBT', 'SNBTO', 'SOLN', 'S