In [None]:
import yfinance as yf
import pandas as pd


# Define the ticker symbol
ticker_symbol = "AAPL"

# Get the Ticker object
company = yf.Ticker(ticker_symbol)


# Get various types of fundamental data
info = company.info
print("Company Info:")
for key, value in info.items():
    #print(f"{key}: {value}")
    print(None)
    break


In [None]:

import json

def load_json_file(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
            return data
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return []
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return []

def extract_tickers_and_details(companies_data):
    tickers = []
    companies_details = {}
    
    for k,company in companies_data.items():
        ticker = company['ticker']
        tickers.append(ticker)
        companies_details[ticker] = company
    
    return tickers, companies_details

def main():
    file_path = 'company.json'
    companies_data = load_json_file(file_path)
    
    if companies_data:
        tickers, companies_details = extract_tickers_and_details(companies_data)
        #print(companies_details)
        df = pd.DataFrame()
        df["Ticker"] = tickers
        df["Company"] = [c["title"] for k,c in companies_details.items()]
        print(df)
        df.to_csv("company_tickers.csv")
        #print("Company Tickers:")
        #print(len(tickers),tickers)

if __name__ == "__main__":
    main()

In [None]:

# Get historical market data
historical_data = company.history(period="5y")
print("\nHistorical Market Data:")
print(historical_data.head())
#historical_data.to_csv("price_history.csv")
historical_data


In [None]:
# Get financial statements
financials = company.financials
print("\nFinancial Statements:")
print(financials)

financials.to_csv("financials.csv")


In [None]:
balance_sheet = company.balance_sheet
print("\nBalance Sheet:")
print(pd.DataFrame(balance_sheet))
balance_sheet.to_csv("balancesheet.csv")


In [None]:
cashflow = company.cashflow
print("\nCash Flow:")
print(cashflow)



In [None]:
# Get key metrics
key_metrics = company.major_holders
print("\nMajor Holders:")
print(key_metrics)



In [None]:
key_metrics = company.institutional_holders
print("\nInstitutional Holders:")
print(key_metrics)

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import datetime

def get_quarter_year(date):
    q = (date.month - 1) // 3 + 1
    y = date.year
    return str(y) + "Q" + str(q)
    
# Load the list of tickers from a CSV file (assuming you have downloaded it)
#tickers = pd.read_csv('company_tickers_subset.csv', header=None, names=['Ticker'])
tickers = pd.read_csv('company_tickers_subset.csv')

def get_fundamental_data(ticker):
    try:
        company = yf.Ticker(ticker)
        info = company.info
        info = pd.DataFrame(info)
        info["ticker"] = ticker

        historical_data = company.history(period="5y")
        historical_data = pd.DataFrame(historical_data)
        historical_data["ticker"] = ticker
        historical_data.reset_index(inplace=True, drop=False)
        historical_data = historical_data.T[~historical_data.T.index.duplicated(keep='first')].T
        
        financials = company.financials
        financials = pd.DataFrame(financials)
        financials.columns = [get_quarter_year(x) for x in financials.columns]
        financials["ticker"] = ticker
        financials.reset_index(inplace=True, drop=False)
        financials = financials.T[~financials.T.index.duplicated(keep='first')].T
        
        #balance_sheet = company.balance_sheet
        balance_sheet = company.quarterly_balance_sheet
        balance_sheet = pd.DataFrame(balance_sheet)
        balance_sheet.columns = [get_quarter_year(x) for x in balance_sheet.columns]
        balance_sheet["ticker"] = ticker
        balance_sheet.reset_index(inplace=True, drop=False)
        balance_sheet = balance_sheet.T[~balance_sheet.T.index.duplicated(keep='first')].T
        
        #cash_flow = company.cashflow
        cash_flow = company.quarterly_cashflow
        cash_flow = pd.DataFrame(cash_flow)
        cash_flow.columns = [get_quarter_year(x) for x in cash_flow.columns]
        cash_flow["ticker"] = ticker
        cash_flow.reset_index(inplace=True, drop=False)
        cash_flow = cash_flow.T[~cash_flow.T.index.duplicated(keep='first')].T

        #cash_flow = company.cashflow
        is_flow = company.quarterly_income_stmt
        is_flow = pd.DataFrame(is_flow)
        is_flow.columns = [get_quarter_year(x) for x in is_flow.columns]
        is_flow["ticker"] = ticker
        is_flow.reset_index(inplace=True, drop=False)
        is_flow = is_flow.T[~is_flow.T.index.duplicated(keep='first')].T

        # Print or save the data as needed
        """
        print(f"Ticker: {ticker}")
        print("Info:")
        print(info)
        print("\nFinancials:")
        print(financials)
        print("\nBalance Sheet:")
        print(balance_sheet)
        print("\nCash Flow:")
        print(cash_flow)
        print("\n")
        """
        return historical_data, info, financials, balance_sheet, cash_flow, is_flow

    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        e = pd.DataFrame()
        return e,e,e,e,e,e

def main():
    info_df = pd.DataFrame()
    hs_df = pd.DataFrame()
    financials_df = pd.DataFrame()
    balance_sheet_df = pd.DataFrame()
    cash_flow_df = pd.DataFrame()
    is_flow_df = pd.DataFrame()
    hs_df,info_df,financials_df,balance_sheet_df,cash_flow_df,is_flow_df = get_fundamental_data("KR")
    for ticker in tickers['Ticker']:    
        hs,i,f,b,c,is_f = get_fundamental_data(ticker)
        
        hs_df = pd.concat([hs,hs_df],ignore_index=True)
        info_df = pd.concat([i,info_df],ignore_index=True)
        financials_df = pd.concat([f,financials_df],ignore_index=True)
        balance_sheet_df = pd.concat([b,balance_sheet_df],ignore_index=True)
        cash_flow_df = pd.concat([c,cash_flow_df],ignore_index=True)
        is_flow_df = pd.concat([is_f,is_flow_df])

        print("success: ",ticker)
        
        # Add a delay to avoid rate limiting
        time.sleep(1)

    info_df.to_csv("company_info_2.csv")
    financials_df.to_csv("Q_financials_info_2.csv")
    balance_sheet_df.to_csv("Q_bs_info_2.csv")
    cash_flow_df.to_csv("Q_cash_flow_info_2.csv")
    #is_flow_df.to_csv("Q_is_info.csv")
    hs_df.to_csv("price_history_2.csv")

if __name__ == "__main__":
    import time
    main()

success:  KR
success:  IR
success:  FAST
success:  GIS
success:  CTVA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MSTR
success:  VRT
success:  EXC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  IT


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  AEM
success:  DKILY
success:  FERG
success:  IDXX
success:  WTKWY
success:  SVNDY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BSBR
success:  VRSK
success:  AME


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NWG
success:  SNOW
success:  LVS
success:  GLW


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  STLA
success:  DOW
success:  HSY
success:  YUM
success:  CTSH
success:  ABEV
success:  EA
success:  EXR
success:  CNC
success:  BKR
success:  CBRE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ATEYY
success:  SYY
success:  BIDU
success:  CCEP
success:  MRAAY
success:  DFS
success:  ED
success:  EFX
success:  DD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NUE
success:  XEL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  RKT


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ALNY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  IRM
success:  RMD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  GOLD
success:  TRGP
success:  HPQ
Error fetching data for GBTC: If using all scalar values, you must pass an index
success:  GBTC
Error fetching data for DIA: If using all scalar values, you must pass an index
success:  DIA
success:  LULU


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  HIG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  VICI
success:  DB
success:  EL
success:  VEEV


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  SLF
success:  WIT


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WDS
success:  EBAY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ANYYY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  QSR
success:  EIX


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CVE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  DAL
success:  TSCO
success:  XYL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  AXON


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ARGX
success:  MLM
success:  NVZMY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WAB


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  GWLIF
success:  PPRUY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FRFHF
success:  GRMN
success:  VMC
Error fetching data for MDY: If using all scalar values, you must pass an index
success:  MDY
success:  HEI
success:  AVB
success:  CSGP
success:  MTD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BCE
success:  LYB


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FUJIY
success:  PHG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TLGPY
success:  ON
success:  ROK


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BRO


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FER
success:  WEC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  OWL
success:  LI


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CDW
success:  PPG
success:  WTW


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MTB
success:  BEKE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CHT


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NVR
success:  PHM


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  DSCSY
success:  HUM


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BNTX
success:  TW


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NET
success:  FITB
success:  EQR
success:  HUBS
success:  ETR


VLTO: Period '5y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', 'ytd', 'max']
  info_df = pd.concat([i,info_df],ignore_index=True)


success:  VLTO
success:  ANSS


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FCNCA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  OEZVY
success:  DXCM
success:  K


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BDORY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ERIE
success:  BBD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ADM
success:  DVN


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WPM
success:  HPE
success:  CAH


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TEF
success:  ZS
success:  KEYS
success:  FTV
success:  PUBGY
success:  AWK
success:  TTWO
Error fetching data for BFA: If using all scalar values, you must pass an index
success:  BFA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  IOT
success:  HAL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BIIB


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  RBLX


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  GIB
success:  STT


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BGNE
success:  IFF
success:  FANUY
success:  DTE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  SMCI


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  VTR
success:  NTAP
success:  LYV
success:  RJF
success:  DOV


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  IX
success:  SBAC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TECK
success:  STM


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  VOD
success:  BR
success:  GPN
success:  TYL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CCL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  KB


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ERIC
success:  HST
success:  FE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  DECK


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  PBA
success:  CHD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NTDTY
success:  NOK
success:  PUK
success:  ICLR
success:  FSLR


SW: Period '5y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', 'ytd', 'max']
  info_df = pd.concat([i,info_df],ignore_index=True)


success:  SW
success:  WY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NTR


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CQP


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TU
success:  ROL
success:  PPL
success:  TROW
success:  HUBB


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CPAY
success:  RYAAY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FNV


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  DIDIY
success:  AEE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MRNA
success:  PINS
success:  ES
success:  STX
success:  EJPRY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  GDDY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WDC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BLDR


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CCJ


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  LDOS
success:  TME
success:  ILMN
success:  STE
success:  CJPRY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CVNA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TPL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  HOOD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  EQT


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FTS
success:  WRB


SMPNY: Period '5y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', 'ytd', 'max']


success:  SMPNY
success:  GFS


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CBOE
success:  LII
success:  ZTO
success:  CHKP


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TPG
success:  CSL
success:  ATO


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  HBAN
success:  PTC
success:  MKC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ASX
success:  WAT
success:  CUK
success:  ZM
success:  BBY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  PKX
success:  TER


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  RCI


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CINF
success:  COO
success:  CMS


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  SHG
success:  SGSOY
success:  RF
success:  YAHOY
success:  WST


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BAH
success:  TSN
success:  TRU
success:  ZBH
success:  INVH


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TDY


KSPI: Period '5y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', 'ytd', 'max']
  info_df = pd.concat([i,info_df],ignore_index=True)


success:  KSPI


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  EME
success:  SYF
success:  UMC
success:  MKL
success:  WSO


BAM: Period '5y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', 'ytd', 'max']
  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BAM
success:  UAL
success:  TEVA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  PFG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ARE
success:  EXPE
success:  CLX
success:  OMC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MDB


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  STLD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  EXE
success:  BALL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NRG


LINE: Period '5y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', 'ytd', 'max']
  info_df = pd.concat([i,info_df],ignore_index=True)


success:  LINE


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MT
success:  PKG
success:  CNP
success:  HDELY


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  RYAN
success:  KOF
success:  ESS


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MOH
success:  ZBRA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  DKNG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BAX
success:  WSM
success:  TLK
success:  DRI


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WBD
success:  HOLX
success:  GPC
success:  LUV


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CFG
success:  FOXA
success:  DG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  AER


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MAA
success:  NTRS


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WLK
success:  SNAP
success:  MAS
success:  YUMC
success:  SSNC


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CTRA
success:  VRSN
success:  EDPFY
success:  LH


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  EC
success:  LPLA


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TS
success:  IHG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  CRBG
success:  ALGN


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  MANH
success:  ULTA
success:  FDS
success:  AVY
success:  EXPD


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ONON


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  AVTR


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  BURL
success:  J
success:  PSTG
success:  HRL


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  L
success:  JBHT
success:  GEN
success:  DKS
success:  CG
success:  RPM


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  ENTG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  NTRA
success:  IP
success:  DGX


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  TOST


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  UDR


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  SWK


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  FNF
success:  RPRX


Could not get exchangeTimezoneName for ticker 'VIV' reason: 'chart'
$VIV: possibly delisted; no price data found  (period=5y)


success:  VIV
success:  NTNX
success:  APTV


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  EG


  info_df = pd.concat([i,info_df],ignore_index=True)


success:  WLMIY
success:  SUI
success:  AMCR


In [None]:
info_df = pd.DataFrame()
hs_df = pd.DataFrame()
financials_df = pd.DataFrame()
balance_sheet_df = pd.DataFrame()
cash_flow_df = pd.DataFrame()
is_flow_df = pd.DataFrame()
hs,i,f,b,c,is_f = get_fundamental_data("AAPL")

hs_df = pd.concat([hs_df,hs],ignore_index=True)
info_df = pd.concat([info_df,i],ignore_index=True)
financials_df = pd.concat([financials_df,f],ignore_index=True)
balance_sheet_df = pd.concat([balance_sheet_df,b],ignore_index=True)
cash_flow_df = pd.concat([cash_flow_df,c],ignore_index=True)


In [None]:
hs,i,f,b,c,is_f = get_fundamental_data("KR")

hs_df = pd.concat([hs_df,hs],ignore_index=True)
info_df = pd.concat([info_df,i],ignore_index=True)
financials_df = pd.concat([financials_df,f],ignore_index=True)
balance_sheet_df = pd.concat([balance_sheet_df,b],ignore_index=True)
#cash_flow_df = pd.concat([cash_flow_df,c],ignore_index=True)

In [None]:
b.index = np.array(b.index) + 1*90

In [None]:
np.array(b.index) + 1*90

In [None]:
b