In [1]:
import requests
import json
import os
from dotenv import load_dotenv
import pandas as pd


In [2]:
load_dotenv()

url = "https://financialmodelingprep.com/stable/search-name"
params = {
    "apikey": os.getenv("FMP_KEY"),
    "exchange": "NASDAQ"
}

In [22]:
df = pd.read_csv("./data/FMP ticker mapping - not found v1.csv", header=None)
df.columns = ["ticker", "name"]
print(df.head())

       ticker                         name
0   600761_CN         Anhui Heli Co., Ltd.
1        AMPS              Altus Power Inc
2         DOC  Healthpeak Properties, Inc.
3  ESSITYB_SE                       Essity
4     HEIG_DE        Heidelberg Materials 


In [23]:
def clean(s):
    def fix_mojibake_and_convert_to_ascii(s):
        try:
            # Step 1: Re-encode as bytes using the encoding that misinterpreted UTF-8
            bytes_data = s.encode('latin1')
            # Step 2: Decode properly using UTF-8
            fixed = bytes_data.decode('utf-8')
        except UnicodeEncodeError:
            # If encoding fails, fallback to original
            fixed = s
        except UnicodeDecodeError:
            # If decoding fails, fallback to original
            fixed = s

        # Step 3: Convert to ASCII (remove accents etc.)
        import unicodedata
        ascii_str = unicodedata.normalize('NFKD', fixed).encode('ascii', 'ignore').decode('ascii')
        return ascii_str
    s = fix_mojibake_and_convert_to_ascii(s).lower().replace('.','').replace(',','').replace("'",'')
    s = s.replace(' ltd','').replace(' inc','').replace('nv','').replace(' co', '').replace('-',' ').replace(' sa', '')
    return s.replace(' group', '').replace(' corporation', '').replace(' corp', '')
    


df['clean name'] = df['name'].apply(clean)
print(df.head())

       ticker                         name             clean name
0   600761_CN         Anhui Heli Co., Ltd.             anhui heli
1        AMPS              Altus Power Inc            altus power
2         DOC  Healthpeak Properties, Inc.  healthpeak properties
3  ESSITYB_SE                       Essity                 essity
4     HEIG_DE        Heidelberg Materials   heidelberg materials 


In [25]:
symbols, names, tickers, fmp_names, exchanges = [], [], [], [], []

counter = 0

for company, name, ticker in zip(df['clean name'], df['name'], df['ticker']):
    print(company)
    
    params['query'], params['exchange'] = company, "NASDAQ"
    r = requests.get(url, params=params)
    try:
        if len(r.json()):
            for res in r.json():
                names.append(name)
                tickers.append(ticker)
                symbols.append(res['symbol'])
                fmp_names.append(res['name'])
                exchanges.append(res['exchange'])
        else:
            params['exchange'] = "NYSE"
            r = requests.get(url, params=params)
            if len(r.json()):
                for res in r.json():
                    names.append(name)
                    tickers.append(ticker)
                    symbols.append(res['symbol'])
                    fmp_names.append(res['name'])
                    exchanges.append(res['exchange'])
            else:
                params['exchange'] = ""
                r = requests.get(url, params=params)
                if len(r.json()) == 0:
                    raise Exception("No data")
                for res in r.json():
                    names.append(name)
                    tickers.append(ticker)
                    symbols.append(res['symbol'])
                    fmp_names.append(res['name'])
                    exchanges.append(res['exchange'])
                
                
    except Exception as e:
        counter += 1
        print(e, counter)
        names.append(company)
        tickers.append(ticker)
        symbols.append('')
        fmp_names.append('')
        exchanges.append('')
#         
    
print(symbols)
        

anhui heli
altus power
healthpeak properties
essity
heidelberg materials 
No data 1
transcontinental
bioxcel therapeutics
power grid of india
No data 2
itc
360 digitech
No data 3
upl
inspired entertainment
truecar
emeren
whitehorse finance
divis laboratories
No data 4
alpineome property trust
No data 5
cipla
cg power & industrial solutions
No data 6
telecom plus
grupo aeroportuario del pacifico
No data 7
aedifica 
oric pharmaceuticals
spruce biosciences
chow tai fook jewellery
viracta therapeutics
nokiarporation
No data 8
anglogold ashanti
nicolet bankshares
wag
univestrporation of pennsylvania
No data 9
sumitomo mitsui trust
toho
toppan holdings
No data 10
minebea mitsumi
cyberagent
japan post
colgate palmolive 
No data 11
unibail rodamco westfield
No data 12
nippon express holdings
orthofix international 
No data 13
upm kymmene
No data 14
marks & spencer
No data 15
gladstone landrp
No data 16
immunic therapeutics
No data 17
liberty formula one
No data 18
cto realty growth
the origina

In [26]:
df = pd.DataFrame({
    'Ticker' : tickers,
    'Name' : names,
    'FMP name': fmp_names,
    'FMP symbol' : symbols,
    'Exchange' : exchanges
})
df.to_csv(r'data/FMP extracted4.csv', index=False)

In [27]:
print(counter)

396
