Michael Ricardo DS 2500 Project Trading torch

In [59]:
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
import pandas_ta as ta

In [60]:
def fetch_ticker_data(ticker, years_ago= 10):
    """
    Gets the market data for a given date and ticker.
    Fetches from yfinance library.

    Args:
        years ago (int): Representing the most recent day for stock entries 
        ticker(str): Representing the offical company stock ticker
    Returns
        df (DataFrame): DataFrame with Stock pricing data and history data
    """
    most_recent = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
    start_date = '2014-12-01'
    
    
    stock_data = yf.Ticker(ticker)
    returning_data = stock_data.history(start = start_date, end= most_recent)

    return returning_data

In [61]:
'''
#13 week, 5 year, and 10 year indexes, will be going by their tickers for this project
tickers = ['^IRX', '^FVX', '^TNX']
df_master_bonds = pd.DataFrame()
for ticker in tickers:
    data = fetch_ticker_data(ticker)
    data = data.drop(columns=['Open', 'Dividends', 'Stock Splits', 'Volume'])
    data.index = pd.to_datetime(data.index).normalize()
    df_master_bonds[f'High_{ticker}'] = data['High']
    df_master_bonds[f'Low_{ticker}'] = data['Low']
    df_master_bonds[f'Close_{ticker}'] = data['Close']
'''

"\n#13 week, 5 year, and 10 year indexes, will be going by their tickers for this project\ntickers = ['^IRX', '^FVX', '^TNX']\ndf_master_bonds = pd.DataFrame()\nfor ticker in tickers:\n    data = fetch_ticker_data(ticker)\n    data = data.drop(columns=['Open', 'Dividends', 'Stock Splits', 'Volume'])\n    data.index = pd.to_datetime(data.index).normalize()\n    df_master_bonds[f'High_{ticker}'] = data['High']\n    df_master_bonds[f'Low_{ticker}'] = data['Low']\n    df_master_bonds[f'Close_{ticker}'] = data['Close']\n"

In [62]:
#Data for the IRX
ticker = '^IRX'
IRX_data = fetch_ticker_data(ticker)
IRX_data = IRX_data.drop(columns=['Open','Dividends', 'Stock Splits', 'Volume'])
IRX_data.index = pd.to_datetime(IRX_data.index).date

In [63]:
#Data for the FVX
ticker = '^FVX'
FVX_data = fetch_ticker_data(ticker)
FVX_data = FVX_data.drop(columns=['Open','Dividends', 'Stock Splits', 'Volume'])
FVX_data.index = pd.to_datetime(FVX_data.index).date

In [64]:
#Data for the TNX
ticker = '^TNX'
TNX_data = fetch_ticker_data(ticker)
TNX_data = TNX_data.drop(columns=['Open','Dividends', 'Stock Splits', 'Volume'])
TNX_data.index = pd.to_datetime(TNX_data.index).date

Adding variables not related to security itself: 

In [65]:
#Data for the S&P 500 & VIX, these will be used as variables 
ticker = '^GSPC'
SP500_data = fetch_ticker_data(ticker)
SP500_data = SP500_data.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits', 'Volume'])
SP500_data.index = SP500_data.index.tz_convert('UTC')

ticker = '^VIX'
VIX_data = fetch_ticker_data(ticker)
VIX_data = VIX_data.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits', 'Volume'])
VIX_data.index = VIX_data.index.tz_convert("UTC")

In [66]:
def external_csv(csv):
    '''
    Pulling in a csv, setting the 'Date column as the index and making it a proper datetime object
    Args:
        csv (CSV) - Data collection with Date column along with pricing data 
    Returns
        df (DataFrame): DataFrame     
    '''
    csv_read = pd.read_csv(csv)
    csv_read['Date'] = csv_read['Date'].str.replace('/', '-')
    csv_read['Date'] = pd.to_datetime(csv_read['Date'])
    csv_read = csv_read.set_index('Date')

    return csv_read

In [67]:
#CPI data CSV
cpi_csv = 'CPI_Data_10.csv'
cpi = external_csv(cpi_csv)


In [68]:
#Data for the 1 year Treasury Bill (Pulled in via CSV)
one_year_csv = '1year_bond_master.csv'
one_year_data = external_csv(one_year_csv)
one_year_data = one_year_data.drop(columns='Open')

In [69]:
#Further cleaning needed for the 1 year, flipping the rows, eliminating % sign within each
one_year_data = one_year_data.iloc[::-1] 

titles = ['High', 'Low','Close']
for title in titles:
    one_year_data[title] = one_year_data[title].str.replace('%', '')

Section will be for data curration and preperation for regression 

In [None]:
def security_editor(df):
    """
    Gaining volatility and moving index metrics from the bond in question. Yield Volatilioty represents the historical volatility for a treasury over a given
    time though its Standerd deviation. Simple moving average measures the average closing price for a week & month of a security. 

    Args:
        df (dataframe): Specific secuity df imported with Close, Volume, High, and Low data for each day of market activity 
    Returns
        df(dataframe): Added ATR. SMA_7 and SMA_30 cols for given securuity. Removing high and low cols
    """
    df['Yield_STDEV'] = ta.stdev(df['Close'], length = 14)
    df = df.drop(columns=['High', 'Low'])
 
    df['SMA_7'] = ta.sma(df['Close'], length=10) 
    df['SMA_30'] = ta.sma(df['Close'], length=50)

    return df

In [71]:
TNX_data_refined = security_editor(TNX_data).dropna()
print(TNX_data_refined)

            Close  Yield_STDEV   SMA_7   SMA_30
2015-02-11  1.988     0.106079  1.8356  2.01794
2015-02-12  1.986     0.114389  1.8591  2.01330
2015-02-13  2.021     0.124403  1.8937  2.00802
2015-02-17  2.145     0.146688  1.9409  2.00518
2015-02-18  2.066     0.148306  1.9695  2.00136
...           ...          ...     ...      ...
2024-11-21  4.432     0.057928  4.3974  4.09002
2024-11-22  4.410     0.054405  4.4078  4.10522
2024-11-25  4.265     0.058041  4.4035  4.11810
2024-11-26  4.302     0.060994  4.3905  4.13130
2024-11-27  4.242     0.070597  4.3696  4.14244

[2466 rows x 4 columns]


In [None]:
'''
def security_combine(df_main, df1, df2):
    """
    Combining the data from other indexes to be used as variabels
    Args:
        df_main (dataframe): Main secuirty which will have the others cols added
        df_1 (dataframe): Sub index which will be added to df_main, will be used for S&P 500
        df_2 (dataframe): Sub index which will be added to df_main, will be used for VIx 
    Returns:
        df_mained: Combined df_main with df1 & df2
    """

    df1 = df1.reindex(df_main.index, method='nearest')  # Align S&P 500 data
    df2 = df2.reindex(df_main.index, method='nearest')  # Align VIX data
    
    df_main = pd.merge(df_main, df1.rename(columns={"Close": "S&P 500 Data"}), 
                       left_index=True, right_index=True, how="outer")

    # Add VIX Data
    df_main = pd.merge(df_main, df2.rename(columns={"Close": "VIX"}), 
                       left_index=True, right_index=True, how="outer")
    
    return df_main
'''

In [None]:
def line_of_best_fit(X, y):
    """ 
    Finds line of best fit based off of a set of vectors
    Args:
        X (array): can be either 1-d or 2-d
        Y (array): can be either 1-d or 2-d
    Returns:
        p (array): 1d array, giving line and slope
    """

    if X.ndim == 1:
        X = add_bias_column(X).T
    else:
        X = X.T
    XTXinv = np.linalg.inv(np.matmul(X, X.T))
    p = np.matmul(XTXinv, np.matmul(X, y))
    return p

    

Regressions: