In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import json

In [5]:
#read in companies to use 
companies_df = pd.read_csv("constituents.csv")
company_tickers = companies_df["Symbol"].tolist()

In [3]:
# Initialise two dataframes
company_df = pd.DataFrame(columns=['id', 'name', 'fullTimeEmployees', 'Industry', 'Country'])
finance_situation_df = pd.DataFrame(columns=['company_id', 'AuditRisk', 'Dividend rate', 'Dividend Yield', 
                                             'Payout rate', 'Beta', 'Market Cap', 'profit margins', 'short ratio', 
                                             'quick ratio', 'current ratio', 'debtToEquity'])


In [6]:
# Retrieving information and append to data frame for each ticker
for ticker in company_tickers:
    try:
        company_dict = {"id":ticker}
        finance_dict = {"company_id":ticker}
        company = yf.Ticker(ticker)
        information = company.info
        company_dict.update({'name':information.get('shortName', None), 'fullTimeEmployees':information.get('fullTimeEmployees', None),
                             'Industry':information.get('sector', None), 'Country':information.get('country', None)})
        company_df = pd.concat([company_df, pd.DataFrame([company_dict])], ignore_index=True)
        finance_dict.update({'AuditRisk': information.get('auditRisk', None), 
                             'Dividend rate': information.get('dividendRate', None), 
                             'Dividend Yield': information.get('dividendYield', None),
                             'Payout rate': information.get('payoutRatio', None), 
                             'Beta': information.get('beta', None), 
                             'Market Cap': information.get('marketCap', None),
                             'profit margins': information.get('profitMargins', None), 
                             'short ratio': information.get('shortRatio', None),
                             'quick ratio': information.get('quickRatio', None), 
                             'current ratio': information.get('currentRatio', None),
                             'debtToEquity': information.get('debtToEquity', None)})
        finance_situation_df = pd.concat([finance_situation_df, pd.DataFrame([finance_dict])], ignore_index=True)
    except Exception as e:
        print(f"Error retrieving data for {ticker}: {e}")

In [132]:
# Convert data types
company_df['fullTimeEmployees'] = company_df['fullTimeEmployees'].astype(float) 
columns = finance_situation_df.columns.tolist()
finance_situation_df[columns[1:]]= finance_situation_df[columns[1:]].astype(float)

In [133]:
# calculate market cap ratio
merged_df = pd.merge(company_df, finance_situation_df, left_on='id', right_on='company_id')
industry_market_cap = merged_df.groupby('Industry')['Market Cap'].sum()
finance_situation_df['Market Cap'] = merged_df.apply(lambda row: (row['Market Cap'] / industry_market_cap[row['Industry']]) * 100, axis=1)

In [137]:
# store 2 csv files
company_df.to_csv('Company.csv', index=False) 
finance_situation_df.to_csv('Finance_Situation.csv', index = False)