# Financial Data

## Imports

In [None]:
import pandas as pd
import simfin as sf
from simfin.names import *
from tqdm.auto import tqdm
from dotenv import dotenv_values

## Constants

## Settings

In [None]:
sf.set_data_dir('simfin_data/')
sf.set_api_key(api_key={dotenv_values('.env')['SIMFIN_API_KEY']})

## Utils

In [None]:
def get_financial_data_for_companies_and_years(csv_path: str, years: list = [2016, 2017, 2018, 2019, 2020]):
    df = pd.read_csv(csv_path)
    financial_df = sf.load(dataset='income', variant='annual', market='us')
    symbols = df['Symbol']
    financial_df = financial_df[financial_df['Ticker'].isin(symbols)].reset_index(drop=True)
    financial_df = financial_df[financial_df['Fiscal Year'].isin(years)].reset_index(drop=True)
    financial_df.to_csv('data/SP500_2020_simfin_financial_data.csv', index=False)
    metadata_df = pd.DataFrame(columns=['Symbol'] + years)
    for index, row in tqdm(financial_df.iterrows()):
        symbol = row['Ticker']
        year = row['Fiscal Year']
        symbol_exist = len(metadata_df[metadata_df['Symbol'] == symbol])
        if not symbol_exist:
            metadata_df.loc[len(metadata_df)] = {'Symbol': symbol}
        metadata_df.loc[metadata_df['Symbol'] == symbol, year] = True
    metadata_df.to_csv('data/SP500_2020_simfin_financial_metadata.csv', index=False)

In [None]:
def check_companies_with_no_financial_data(metadata_path: str, financial_csv_path: str, reviews_csv_path: str):
    metadata_df = pd.read_csv(metadata_path)
    financial_df = pd.read_csv(financial_csv_path)
    reviews_df = pd.read_csv(reviews_csv_path)
    empty_companies = metadata_df[~metadata_df['Symbol'].isin(financial_df['Symbol'])]['Company Name']
    display(reviews_df[reviews_df['Company Name'].isin(empty_companies)])

In [None]:
def check_companies_with_few_reviews(reviews_csv_path: str):
    reviews_df = pd.read_csv(reviews_csv_path)
    display(reviews_df[reviews_df['Number of Reviews'] < 30])

## Run

In [None]:
CSV_PATH = 'data/SP500_2020_wikipedia_metadata.csv'
# get_financial_data_for_companies_and_years(CSV_PATH)

In [None]:
METADATA_PATH = 'data/SP500_2020_wikipedia_metadata.csv'
FINANCIAL_CSV_PATH = 'data/SP500_2020_simfin_financial_metadata.csv'
REVIEWS_CSV_PATH = 'data/SP500_2020_reviews_metadata.csv'
# check_companies_with_no_financial_data(METADATA_PATH, FINANCIAL_CSV_PATH, REVIEWS_CSV_PATH)

In [None]:
REVIEWS_CSV_PATH = 'data/SP500_2020_reviews_metadata.csv'
# check_companies_with_few_reviews(REVIEWS_CSV_PATH)