In [1]:
import requests
import pandas as pd
from tqdm import tqdm
import os
import time
from requests.utils import quote

## Step 1: Data Collection

### Step 1A: Get list of US Equities held by Norewegian Wealth Fund from 2013-2023


A link to the API can be found here: [Norges Bank Investment Management API](https://www.nbim.no/en/responsible-investment/voting/our-voting-records/api-access-to-our-voting/) 


I used this code to get all companies listed in their database: 

In [None]:
api_key = " Insert Your API Key Here" 

def fetch_company_details(api_key, company_name):
    detail_url = f"https://vd.a.nbim.no/v1/query/company/{requests.utils.quote(company_name)}"
    headers = {"x-api-key": api_key}
    response = requests.get(detail_url, headers=headers)
    if response.status_code == 200:
        return response.json()
    elif response.status_code == 429:
        print("Rate limit exceeded. Sleeping...")
        time.sleep(900)  # Sleep time might need adjustment
        return fetch_company_details(api_key, company_name)
    else:
        print(f"Failed to fetch details for company {company_name}: {response.status_code}")
        return {}


def build_companies_dataframe(api_key, companies_list, save_path):
    detailed_companies = []
    start_from_index = get_last_processed_index(companies_list, save_path)

    for i, company in enumerate(tqdm(companies_list[start_from_index:], desc="Fetching company details"), start=start_from_index):
        company_name = company['n']
        company_details = fetch_company_details(api_key, company_name)
        if 'companies' in company_details:
            for detail in company_details['companies']:
                detailed_companies.append({
                    'Company Name': company_name,
                    'Ticker': detail.get('Ticker', 'N/A'),
                    'Country': detail.get('country', 'N/A')
                })
        if (i + 1 - start_from_index) % 5 == 0 or i == len(companies_list) - 1:
            pd.DataFrame(detailed_companies).to_csv(save_path, mode='a', header=not os.path.exists(save_path), index=False)
            detailed_companies = []  # Reset to avoid re-saving data

        time.sleep(1)  # Consider dynamic adjustment based on API's rate limiting response

    if os.path.exists(save_path):
        return pd.read_csv(save_path)
    else:
        return pd.DataFrame()

#### If API times out, restart at index: 

In [2]:
def get_last_processed_index(companies_list, save_path):
    try:
        df = pd.read_csv(save_path)
        # Correct column name to 'Company Name'
        last_processed_company = df.iloc[-1]['11 88 0 Solutions AG']
        for index, company in enumerate(companies_list):
            if company['n'] == last_processed_company:
                return index + 1  # Resume from the next company
    except (pd.errors.EmptyDataError, FileNotFoundError):
        print("CSV file is empty or does not exist.")
    return 0

### Step 1B: Cross refrence data with yfinance

The data being evaluated are SEC filings of 1260 Equities held by the Norwegain Wealth Fund, downloaded from the SEC's Electronic Data Gathering, Analysis and Retrieval (EDGAR) website. 

Cross refrenced data pulled with y_finance [yfinance](https://pypi.org/project/yfinance/) - Norwegain Wealth Fund Database did not have the correct industry tags: 


In [None]:

```
import pandas as pd
import yfinance as yf
from tqdm import tqdm

# Assuming 'test' is your dataframe and it already exists
# Make sure it is either the full dataframe or a copy, to avoid SettingWithCopyWarning:
# test = df.head().copy()

# Enable tqdm for pandas apply
tqdm.pandas()

# Function to fetch sector for a ticker
def fetch_sector(ticker):
    try:
        ticker_data = yf.Ticker(ticker)
        return ticker_data.info.get('sector', "N/A")
    except Exception as e:
        return "Error"

# Apply the function to your dataframe to create a new 'Sector' column
# Using progress_apply instead of apply to show the progress bar
df['Sector'] = df['Ticker'].progress_apply(fetch_sector)

# Display the updated dataframe
df.head()
```
