Michael Ricardo DS 2500 Project Trading torch

In [10]:
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
import pandas_ta as ta

In [11]:
def fetch_ticker_data(ticker, years_ago= 5):
    """
    Gets the market data for a given date and ticker.
    Fetches from yfinance library.

    Args:
        years ago (int): Representing the most recent day for stock entries 
        ticker(str): Representing the offical company stock ticker
    Returns
        df (DataFrame): DataFrame with Stock pricing data and history data
    """
    most_recent = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
    five_years_ago = (datetime.today() - timedelta(days= years_ago * 365)).strftime('%Y-%m-%d') 
    
    
    stock_data = yf.Ticker(ticker)
    returning_data = stock_data.history(start = five_years_ago, end= most_recent)

    return returning_data

In [12]:
#Data for the Disney
ticker = 'Dis'
dis_data = fetch_ticker_data(ticker)
dis_data = dis_data.drop(columns=['Open', 'Dividends', 'Stock Splits'])
dis_data.index = dis_data.index.tz_convert('UTC')

In [13]:
#Data for the S&P 500 
ticker = '^GSPC'
SP500_data = fetch_ticker_data(ticker)
SP500_data = SP500_data.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits', 'Volume'])
SP500_data.index = SP500_data.index.tz_convert('UTC')

Adding variables not related to security itself: 

In [14]:
#Data for the S&P 500 
ticker = '^GSPC'
SP500_data = fetch_ticker_data(ticker)
SP500_data = SP500_data.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits', 'Volume'])
SP500_data.index = SP500_data.index.tz_convert('UTC')

In [15]:
#CBOE Volatility Index (VIX)
ticker = '^VIX'
VIX_data = fetch_ticker_data(ticker)
VIX_data = VIX_data.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits', 'Volume'])
VIX_data.index = VIX_data.index.tz_convert("UTC")

Section will be for data curration and preperation for regression 

In [16]:
def security_editor(df):
    """
    Gaining volume, volatility, and moving index metrics from the security in question. Averae True Range represents volatility for an asset over a weeks span/
    Simple moving average measures the average closing price for a week & month of a security. On-Balance volume takes into account volume movement within a seccurity.  

    Args:
        df (dataframe): Specific secuity df imported with Close, Volume, High, and Low data for each day of market activity 
    Returns
        df(dataframe): Added ATR. SMA_7, SMA_30, and OBV cols for given securuity. Removing high and low cols
    """
    df['ATR'] = ta.atr(df['High'], df['Low'], df['Close'], length=7)
    df = df.drop(columns=['High', 'Low'])
 
    df['SMA_7'] = ta.sma(df['Close'], length=10) 
    df['SMA_30'] = ta.sma(df['Close'], length=50)

    df['OBV'] = ta.obv(df['Close'], df['Volume'])
    return df

In [17]:
dis_refined = security_editor(dis_data)
print(dis_refined)

                                Close    Volume       ATR       SMA_7  \
Date                                                                    
2019-11-29 05:00:00+00:00  149.498077   6284900       NaN         NaN   
2019-12-02 05:00:00+00:00  148.551239  10351000       NaN         NaN   
2019-12-03 05:00:00+00:00  146.539261   9273800       NaN         NaN   
2019-12-04 05:00:00+00:00  146.243408   7684800       NaN         NaN   
2019-12-05 05:00:00+00:00  145.414932   7363300       NaN         NaN   
...                               ...       ...       ...         ...   
2024-11-19 05:00:00+00:00  112.419998  12166800  3.432818  105.157000   
2024-11-20 05:00:00+00:00  114.260002   9319300  3.356702  106.694000   
2024-11-21 05:00:00+00:00  114.720001  10261000  3.222887  108.273000   
2024-11-22 05:00:00+00:00  115.650002  10098000  3.153903  109.936001   
2024-11-25 05:00:00+00:00  116.000000  10397800  2.939060  111.450001   

                            SMA_30          OBV  


In [18]:
def security_combine(df_main, df1, df2):
    """
    Combining the data from other indexes to be used as variabels
    Args:
        df_main (dataframe): Main secuirty which will have the others cols added
        df_1 (dataframe): Sub index which will be added to df_main, will be used for S&P 500
        df_2 (dataframe): Sub index which will be added to df_main, will be used for VIx 
    Returns
        df_mained: Combined df_main with df1 & df2
    """

    df1 = df1.reindex(df_main.index, method='nearest')  # Align S&P 500 data
    df2 = df2.reindex(df_main.index, method='nearest')  # Align VIX data
    
    df_main = pd.merge(df_main, df1.rename(columns={"Close": "S&P 500 Data"}), 
                       left_index=True, right_index=True, how="outer")

    # Add VIX Data
    df_main = pd.merge(df_main, df2.rename(columns={"Close": "VIX"}), 
                       left_index=True, right_index=True, how="outer")
    
    return df_main

In [19]:
df_combined = security_combine(dis_refined, SP500_data, VIX_data)
df_combined

Unnamed: 0_level_0,Close,Volume,ATR,SMA_7,SMA_30,OBV,S&P 500 Data,VIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-11-29 05:00:00+00:00,149.498077,6284900,,,,6284900.0,3140.979980,12.620000
2019-12-02 05:00:00+00:00,148.551239,10351000,,,,-4066100.0,3113.870117,14.910000
2019-12-03 05:00:00+00:00,146.539261,9273800,,,,-13339900.0,3093.199951,15.960000
2019-12-04 05:00:00+00:00,146.243408,7684800,,,,-21024700.0,3112.760010,14.800000
2019-12-05 05:00:00+00:00,145.414932,7363300,,,,-28388000.0,3117.429932,14.520000
...,...,...,...,...,...,...,...,...
2024-11-19 05:00:00+00:00,112.419998,12166800,3.432818,105.157000,96.5488,-470908400.0,5916.979980,16.350000
2024-11-20 05:00:00+00:00,114.260002,9319300,3.356702,106.694000,97.0676,-461589100.0,5917.109863,17.160000
2024-11-21 05:00:00+00:00,114.720001,10261000,3.222887,108.273000,97.5760,-451328100.0,5948.709961,16.870001
2024-11-22 05:00:00+00:00,115.650002,10098000,3.153903,109.936001,98.0780,-441230100.0,5969.339844,15.240000
