In [1]:
# Install libraries the first time
# ! pip install -q ipynb yfinance pandas pathlib numpy

In [2]:
import yfinance as yf
import pandas as pd
from pathlib import Path
from datetime import *
import numpy as np

### Variables

In [3]:
data_folder = Path.cwd().parent / 'TrainingData'
data_name = 'stockData.csv'

time_frame = '5y'  # Options: 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max
symbols = [
    'AAPL', 'MSFT', 'AMZN', 'GOOGL', 'GOOG', 'META', 'TSLA', 'NVDA', 'PYPL', 'ADBE',
    'NFLX', 'INTC', 'CMCSA', 'PEP', 'CSCO', 'AVGO', 'COST', 'AMGN', 'TXN', 'QCOM',
    'HON', 'SBUX', 'AMD', 'INTU', 'ISRG', 'BKNG', 'MDLZ', 'ADP', 'GILD', 'MU',
    'AMAT', 'LRCX', 'ADI', 'ASML', 'MRNA', 'ZM', 'DOCU', 'ROST', 'REGN', 'VRTX',
    'EBAY', 'EXC', 'SNPS', 'WDAY', 'TEAM', 'OKTA', 'CRWD', 'SHOP', 'SQ', 'UBER',
    'TCEHY', 'TSM', 'NIO', 'JD', 'BIDU', 'PDD', 'MELI', 'SHOP', 'SE', 'NTES', 'HDB',
    'INFY', 'IBN', 'VALE', 'RIO', 'BHP', 'SCCO', 'EQNR', 'ENB', 'SU', 'CNQ',
    'BCE', 'TU', 'RY', 'TD', 'BNS', 'CM', 'MFC', 'SLF', 'BIP', 'BEP', 'TRP',
    'CNI', 'NSRGY'
]

### Functions for Data Collection and Processing

In [4]:
def get_industry(ticker_symbol):
    try:
        return yf.Ticker(ticker_symbol).info.get('industry', 'Unknown')
    except Exception as e:
        print(f"Error fetching industry for {ticker_symbol}: {e}")
        return 'Error'

In [5]:
def calculate_future_year_change(ticker_symbol, timeframe, buffer=1):
    valid_periods = ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']
    extended_timeframe = 'max' if timeframe == 'max' else valid_periods[valid_periods.index(timeframe) + buffer]
    try:
        future_change = yf.Ticker(ticker_symbol).history(period=extended_timeframe)
        future_change['Future Year Change'] = (future_change['Close'].shift(-252) / future_change['Close'] - 1)

        end_date = future_change.index[-1] - pd.DateOffset(years=1)
        start_date = end_date - pd.DateOffset(years=int(timeframe[:-1]))
        future_change = future_change.loc[start_date:end_date].dropna(subset=['Future Year Change'])
        return future_change
    except Exception as e:
        print(f"Error processing {ticker_symbol}: {e}")
        return pd.DataFrame()

In [6]:
def get_static_ev_data(ticker_symbol):
    try:
        info = yf.Ticker(ticker_symbol).info
        return info.get('totalDebt', 0), info.get('totalCash', 0), info.get('sharesOutstanding', None)
    except Exception as e:
        print(f"Error fetching EV components for {ticker_symbol}: {e}")
        return None, None, None

In [7]:
def calculate_approx_ebit(ticker_symbol):
    try:
        info = yf.Ticker(ticker_symbol).info
        revenue = info.get('totalRevenue', None)
        operating_income = info.get('operatingIncome', None)
        return operating_income if operating_income else (revenue * 0.15 if revenue else None)
    except Exception as e:
        print(f"Error calculating EBIT for {ticker_symbol}: {e}")
        return None

In [8]:
def calculate_daily_ev_ebit(hist_data):
    hist_data['EV/EBIT'] = None
    for symbol in hist_data['Ticker'].unique():
        ticker_data = hist_data[hist_data['Ticker'] == symbol].copy()
        total_debt, cash, shares_outstanding = get_static_ev_data(symbol)
        ebit = calculate_approx_ebit(symbol)

        if shares_outstanding and ebit and ebit != 0:
            ticker_data['EV'] = (ticker_data['Close'] * shares_outstanding) + total_debt - cash
            ticker_data['EV/EBIT'] = ticker_data['EV'] / ebit
            hist_data.loc[ticker_data.index, 'EV/EBIT'] = ticker_data['EV/EBIT']
    return hist_data.drop(columns=['EV'], errors='ignore')

In [9]:
def calculate_daily_roic(hist_data):
    for symbol in hist_data['Ticker'].unique():
        ticker_data = hist_data[hist_data['Ticker'] == symbol].copy()
        total_debt, cash, shares_outstanding = get_static_ev_data(symbol)
        ebit = calculate_approx_ebit(symbol)

        if shares_outstanding and ebit:
            tax_rate = 0.21
            nopat = ebit * (1 - tax_rate)
            invested_capital = total_debt + (ticker_data['Close'] * shares_outstanding) - cash
            ticker_data['ROIC'] = np.where(invested_capital != 0, nopat / invested_capital, None)
            hist_data.loc[ticker_data.index, 'ROIC'] = ticker_data['ROIC']
    return hist_data

In [10]:
def getTickerData(ticker_symbol):
    try:
        ticker = yf.Ticker(ticker_symbol)
        hist = ticker.history(period='1d')
        total_debt, cash, shares_outstanding = get_static_ev_data(ticker_symbol)
        ebit = calculate_approx_ebit(ticker_symbol)
        ev = (hist['Close'].iloc[-1] * shares_outstanding) + total_debt - cash
        ev_ebit = ev / ebit if ebit else None
        market_cap = hist['Close'].iloc[-1] * shares_outstanding
        tax_rate = 0.21
        nopat = ebit * (1 - tax_rate) if ebit else None
        invested_capital = total_debt + market_cap - cash
        roic = nopat / invested_capital if nopat and invested_capital else None
        industry = get_industry(ticker_symbol)

        return pd.DataFrame([{
            'Open': hist['Open'].iloc[-1],
            'High': hist['High'].iloc[-1],
            'Low': hist['Low'].iloc[-1],
            'Close': hist['Close'].iloc[-1],
            'Volume': hist['Volume'].iloc[-1],
            'Dividends': hist.get('Dividends', pd.Series([0.0])).iloc[-1],
            'Stock Splits': hist.get('Stock Splits', pd.Series([0.0])).iloc[-1],
            'EV/EBIT': ev_ebit,
            'Market Cap': market_cap,
            'ROIC': roic,
            'Industry': industry
        }])
    except Exception as e:
        print(f"Error fetching data for {ticker_symbol}: {e}")
        return pd.DataFrame()

In [11]:
def getTickerDataFrom1YrAgo(ticker_symbol):
    try:
        # Fetch ticker data
        ticker = yf.Ticker(ticker_symbol)

        # Define the date range: one year ago to today
        today = datetime.today()
        one_year_ago = today - timedelta(days=365)

        # Fetch historical data for one year ago
        hist = ticker.history(start=(one_year_ago - timedelta(days=1)).strftime('%Y-%m-%d'), 
                              end=(one_year_ago + timedelta(days=1)).strftime('%Y-%m-%d'))
        if hist.empty:
            raise ValueError(f"No historical data available for {ticker_symbol} around {one_year_ago.strftime('%Y-%m-%d')}.")

        # Extract the closest data point to one year ago
        row = hist.iloc[-1]  # Get the last available entry within the date range

        # Price today
        price_today = ticker.history(period='1d')['Close'].iloc[-1]

        # Calculate future price change (from one year ago to today)
        price_change_future = ((price_today - row['Close']) / row['Close']) if row['Close'] else None

        # Collect additional data
        total_debt, cash, shares_outstanding = get_static_ev_data(ticker_symbol)
        ebit = calculate_approx_ebit(ticker_symbol)
        ev = (row['Close'] * shares_outstanding) + total_debt - cash if shares_outstanding else None
        ev_ebit = ev / ebit if ebit else None
        market_cap = row['Close'] * shares_outstanding if shares_outstanding else None
        tax_rate = 0.21
        nopat = ebit * (1 - tax_rate) if ebit else None
        invested_capital = total_debt + market_cap - cash if market_cap and total_debt and cash else None
        roic = nopat / invested_capital if nopat and invested_capital else None
        industry = get_industry(ticker_symbol)

        # Return as a DataFrame
        return pd.DataFrame([{
            'Date': row.name,
            'Open': row['Open'],
            'High': row['High'],
            'Low': row['Low'],
            'Close': row['Close'],
            'Volume': row['Volume'],
            'Dividends': row.get('Dividends', 0.0),
            'Stock Splits': row.get('Stock Splits', 0.0),
            'Future Year Change': price_change_future,
            'Industry': industry,
            'EV/EBIT': ev_ebit,
            'ROIC': roic
        }])

    except Exception as e:
        print(f"Error fetching data for {ticker_symbol}: {e}")
        return pd.DataFrame()


In [12]:
hist_data = pd.DataFrame()
for symbol in symbols:
    try:
        future_change = calculate_future_year_change(symbol, time_frame)
        future_change['Ticker'] = symbol
        future_change['Industry'] = get_industry(symbol)
        hist_data = pd.concat([hist_data, future_change])
    except Exception as e:
        print(f"Error processing {symbol}: {e}")

hist_data.reset_index(inplace=True)
hist_data = calculate_daily_ev_ebit(hist_data)
hist_data = calculate_daily_roic(hist_data)
hist_data.to_csv(data_folder / data_name, index=False)

$NVDA: possibly delisted; no price data found  (period=10y)
  hist_data = pd.concat([hist_data, future_change])


Error processing NVDA: index -1 is out of bounds for axis 0 with size 0
