In [None]:
import yfinance as yf
import pandas as pd


In [None]:
## Obtain Financial Information and save as (variable)

fin_dict = yf.Ticker("MSFT")
fin_msft = yf.Ticker("MSFT")
fin_amzn = yf.Ticker("AMZN")
fin_nflx = yf.Ticker("NFLX")
fin_goog = yf.Ticker("GOOG")

stock_tickers = ['MSFT','AMZN','NFLX','GOOG']


start_date = '2023-11-01'
end_date = '2023-11-07'


## Pull Financial Data



In [None]:

# - income statement
income_data =fin_dict.income_stmt
quarterly_income = fin_dict.quarterly_income_stmt
# - balance sheet
balance_data = fin_dict.balance_sheet
quarterly_balance = fin_dict.quarterly_balance_sheet
# - cash flow statement
cash_flow = fin_dict.cashflow
quarterly_cash = fin_dict.quarterly_cashflow
# show news
fin_news = fin_dict.news


fin_basic = fin_dict.basic_info


### Get Financial News

In [None]:
# get financial news / links to select tickers

# Create a list of your yf.Ticker objects
fins = [fin_msft, fin_amzn, fin_nflx, fin_goog]

news_data = []

for fin in fins:
    fin_news = fin.news  # Get the news directly from the yf.Ticker object
    for news_item in fin_news:
        news_data.append({
            'title': news_item['title'],
            'publisher': news_item['publisher'],
            'link': news_item['link'],
            'type': news_item['type'],
            'relatedTickers': news_item['relatedTickers']
        })

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(news_data)

# Save the DataFrame to a CSV file
df.to_csv('news_data.csv', index=False)

df.head()

### Financials across tickers

In [None]:

tickers = [yf.Ticker(ticker) for ticker in stock_tickers]

dfs = [] # list for each ticker's dataframe
def deduplicate_columns(df):
    cols = pd.Series(df.columns)
    for dup in cols[cols.duplicated()].unique(): 
        cols[cols[cols == dup].index.values.tolist()] = [dup + '_' + str(i) if i != 0 else dup for i in range(sum(cols == dup))]
    df.columns = cols
    return df

tickers = [yf.Ticker(ticker) for ticker in stock_tickers]

dfs = [] # list for each ticker's dataframe
for ticker in tickers:  
    # get each financial statement
    pnl = ticker.financials
    bs = ticker.balancesheet
    cf = ticker.cashflow
   
    # concatenate into one dataframe
    fs = pd.concat([pnl, bs, cf])

    # make dataframe format nicer
    # Swap dates and columns
    data = fs.T
    # reset index (date) into a column
    data = data.reset_index()
    # Rename old index from '' to Date
    data.columns = ['Date', *data.columns[1:]]
    # Add ticker to dataframe
    data['Ticker'] = ticker.ticker
    dfs.append(data)

for df in dfs:
    df = deduplicate_columns(df)
df = pd.concat(dfs, ignore_index=True)
df = df.set_index(['Ticker','Date'])
df.iloc[:,:5] # for display purposes

#df.to_json('output.jsonl', orient='records', lines=True)

df.head(100)

### Get Historical Market Data

In [96]:

#stock_tickers
tickers = yf.Tickers(stock_tickers)
tickers_hist = tickers.history(start = start_date ,end=end_date,interval='1m',)
tickers_hist


# TRANSFORM MULTI-LEVEL INDEX INTO A SINGLE-INDEX SET OF COLUMNS.
tickers_hist.stack(level=1).rename_axis(['Date', 'Ticker']).reset_index(level=1)

[*********************100%%**********************]  4 of 4 completed


Unnamed: 0_level_0,Ticker,Close,Dividends,High,Low,Open,Stock Splits,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-11-01 09:30:00-04:00,AMZN,134.147995,0.0,134.339996,133.710007,133.960007,0.0,2202693
2023-11-01 09:30:00-04:00,GOOG,125.110001,0.0,125.339996,124.925003,125.339996,0.0,570480
2023-11-01 09:30:00-04:00,MSFT,339.815002,0.0,340.399994,339.649994,339.790009,0.0,611174
2023-11-01 09:30:00-04:00,NFLX,416.980011,0.0,417.440002,414.769989,414.769989,0.0,130979
2023-11-01 09:31:00-04:00,AMZN,134.369003,0.0,134.399994,133.990005,134.149994,0.0,293806
...,...,...,...,...,...,...,...,...
2023-11-06 15:58:00-05:00,NFLX,434.690002,0.0,434.690002,434.260010,434.450012,0.0,26387
2023-11-06 15:59:00-05:00,AMZN,139.770004,0.0,139.770004,139.639999,139.654999,0.0,908057
2023-11-06 15:59:00-05:00,GOOG,131.490005,0.0,131.559998,131.419998,131.460007,0.0,438144
2023-11-06 15:59:00-05:00,MSFT,356.640015,0.0,356.670013,356.239990,356.359985,0.0,512477
