In [23]:
import pandas as pd
from pathlib import Path

repo = Path('..').resolve()
omxspi_path = '../omxspi_members.csv'
master_path = '../instrument_master.csv'
coverage_path = '../coverage_table_updated.csv'

# Read OMXSPI members (no header in the file)
omxspi = pd.read_csv(omxspi_path, sep='\t', header=None, 
                     names=['Fullname', 'CCY', 'Last', 'Change', 'ChangePct', 
                            'Bid', 'Ask', 'Volume', 'Turnover', 'Updated'])
master = pd.read_csv(master_path, sep='\t')
coverage = pd.read_csv(coverage_path, sep='\t')

print(f"OMXSPI companies: {len(omxspi)}")
print(f"Current instrument_master: {len(master)} companies")
print(f"Current coverage table: {len(coverage)} rows")
omxspi.head(10)

OMXSPI companies: 394
Current instrument_master: 30 companies
Current coverage table: 180 rows


Unnamed: 0,Fullname,CCY,Last,Change,ChangePct,Bid,Ask,Volume,Turnover,Updated
0,AAK,SEK,258.4,1.2,+0.46%,258.8,259.2,376353,97145911,18:00:00
1,ABB Ltd,SEK,709.8,1.6,+0.22%,710.2,710.4,485967,344276679,18:00:00
2,AcadeMedia,SEK,94.5,-1.2,-1.25%,94.2,94.4,174418,16511663,18:00:00
3,Acast,SEK,32.55,0.4,+1.24%,32.4,32.9,249704,8092563,18:00:00
4,Acrinova A,SEK,10.25,0.05,+0.49%,,,25100,257288,18:00:00
5,Acrinova B,SEK,10.0,-0.1,-0.99%,10.0,10.15,16989,170774,18:00:00
6,Actic Group,SEK,29.8,-1.9,-5.99%,29.8,30.1,75655,2293495,18:00:00
7,Active Biotech,SEK,0.0435,-0.0005,-1.13%,0.0433,0.0435,7056103,310328,18:00:00
8,AddLife B,SEK,152.1,-1.1,-0.71%,152.0,152.5,114921,17499860,18:00:00
9,Addnode Group B,SEK,92.2,-1.3,-1.39%,91.7,92.5,106430,9740235,18:00:00


In [24]:
# Find companies in OMXSPI that are not yet in instrument_master
existing_companies = set(master['CompanyName'])
omxspi_companies = set(omxspi['Fullname'])

new_companies = omxspi_companies - existing_companies
print(f"New companies to add: {len(new_companies)}")
print(f"Already in master: {len(omxspi_companies & existing_companies)}")
sorted(new_companies)[:20] if new_companies else "All companies already in master"

New companies to add: 364
Already in master: 30


['AAK',
 'AFRY',
 'AQ Group',
 'AcadeMedia',
 'Acast',
 'Acrinova A',
 'Acrinova B',
 'Actic Group',
 'Active Biotech',
 'AddLife B',
 'Addnode Group B',
 'Alimak Group',
 'Alleima',
 'Alligator Bioscience',
 'Alligo B',
 'Alvotech SDB',
 'Ambea',
 'Annehem Fastigheter B',
 'Anoto Group',
 'Apotea']

In [25]:
# Add new companies to instrument_master with new CIDs
if new_companies:
    # Get the highest existing CID number
    existing_cids = master['CID'].str.extract(r'S(\d+)').astype(int)
    max_cid = existing_cids[0].max() if not existing_cids.empty else 0
    
    new_rows = []
    for i, company in enumerate(sorted(new_companies), start=max_cid + 1):
        new_rows.append({
            'CID': f'S{i}',
            'CompanyName': company,
            'investor_relations_url': ''  # To be filled in step 2
        })
    
    master = pd.concat([master, pd.DataFrame(new_rows)], ignore_index=True)
    master.to_csv(master_path, sep='\t', index=False)
    print(f"Added {len(new_rows)} new companies to instrument_master.csv")
    print(f"Total companies now: {len(master)}")
else:
    print("No new companies to add")

master.head(20)

Added 364 new companies to instrument_master.csv
Total companies now: 394


Unnamed: 0,CompanyName,ISIN,CID,date refreshed,Nasdaq_url,Active_coverage?,investor_relations_url,CCY,MostRecentStockPrice,TradedStockVolume,StockTurnover,validated
0,ABB Ltd,,S1,,https://www.nasdaq.com/european-market-activit...,True,https://global.abb/group/en/investors/annual-r...,SEK,683.0,453736.0,308982600.0,True
1,Addtech B,,S2,,,True,https://www.addtech.com/investors-and-media,SEK,325.6,350644.0,114134400.0,True
2,Alfa Laval,,S3,,,True,https://www.alfalaval.com/investors/,SEK,460.6,834570.0,384199300.0,True
3,ASSA ABLOY B,,S4,,,True,https://www.assaabloy.com/group/en/investors,SEK,355.8,1924559.0,685033000.0,True
4,AstraZeneca,,S5,,,True,https://www.astrazeneca.com/investor-relations...,SEK,1701.0,283942.0,480673400.0,True
5,Atlas Copco A,,S6,,,True,https://www.atlascopcogroup.com/en/investors,SEK,164.05,5327339.0,873528200.0,True
6,Boliden,,S7,,,True,https://investors.boliden.com/,SEK,498.2,1621368.0,806020700.0,True
7,Epiroc A,,S8,,,True,https://www.epirocgroup.com/en/investors/inves...,SEK,206.7,2926875.0,605520700.0,True
8,EQT,,S9,,,True,https://eqtgroup.com/shareholders/reports-and-...,SEK,351.8,6554439.0,2300568000.0,True
9,Ericsson B,,S10,,,True,https://www.ericsson.com/en/investors,SEK,90.42,8433509.0,762114500.0,True


In [26]:
# Ensure coverage table has rows for all OMXSPI companies x 6 years
years = list(range(2019, 2025))
required_rows = [(cid, y) for cid in master['CID'] for y in years]
have_rows = set(zip(coverage['Company_Identifier'], coverage['FiscalYear'])) if not coverage.empty else set()
missing_rows = [r for r in required_rows if r not in have_rows]

print(f"Expected rows: {len(required_rows)} ({len(master)} companies × 6 years)")
print(f"Current rows: {len(have_rows)}")
print(f"Missing rows: {len(missing_rows)}")

# Seed missing rows
if missing_rows:
    new_rows = []
    for cid, year in missing_rows:
        company = master.loc[master['CID'] == cid, 'CompanyName'].iloc[0]
        new_rows.append({
            'CompanyName': company,
            'Company_Identifier': cid,
            'What_to_capture': 'Annual report',
            'FiscalYear': year,
            'IR_URL': master.loc[master['CID'] == cid, 'investor_relations_url'].fillna('').iloc[0],
            'Priority': 'Not Downloaded'
        })
    coverage = pd.concat([coverage, pd.DataFrame(new_rows)], ignore_index=True)
    coverage.to_csv(coverage_path, sep='\t', index=False)
    print(f"Added {len(new_rows)} new rows to coverage table")

print(f"Final coverage table shape: {coverage.shape}")
coverage.tail(10)

Expected rows: 2364 (394 companies × 6 years)
Current rows: 180
Missing rows: 2184
Added 2184 new rows to coverage table
Final coverage table shape: (2364, 27)


Unnamed: 0,CompanyName,Company_Identifier,What_to_capture,FiscalYear,IR_URL,Capture_attempt_date,AgentId,CaptureStatus,CaptureStatusDetails,milestone1_Establish_Connection,...,Probability_Annual_Report,Document_Type,Classification_Reasoning,User_Agent_Used,Validation_Status,Validation_Confidence,Validation_Issues,Validation_Date,Priority,Failure_Reason
2354,mySafety Group B,S393,Annual report,2021,,,,,,,...,,,,,,,,,Not Downloaded,
2355,mySafety Group B,S393,Annual report,2022,,,,,,,...,,,,,,,,,Not Downloaded,
2356,mySafety Group B,S393,Annual report,2023,,,,,,,...,,,,,,,,,Not Downloaded,
2357,mySafety Group B,S393,Annual report,2024,,,,,,,...,,,,,,,,,Not Downloaded,
2358,Öresund,S394,Annual report,2019,,,,,,,...,,,,,,,,,Not Downloaded,
2359,Öresund,S394,Annual report,2020,,,,,,,...,,,,,,,,,Not Downloaded,
2360,Öresund,S394,Annual report,2021,,,,,,,...,,,,,,,,,Not Downloaded,
2361,Öresund,S394,Annual report,2022,,,,,,,...,,,,,,,,,Not Downloaded,
2362,Öresund,S394,Annual report,2023,,,,,,,...,,,,,,,,,Not Downloaded,
2363,Öresund,S394,Annual report,2024,,,,,,,...,,,,,,,,,Not Downloaded,
