# Import these functions

In [None]:
!pip install yahoo_fin
!pip install yoptions

In [5]:
import concurrent.futures

# Fundemental & Technical Data
import yfinance as yf 

# Options Data
import yoptions as yo
from yahoo_fin import options

# General Helper Libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta



File not found.


In [19]:
# Set option to display all columns
pd.set_option('display.max_columns', None)

# Optionally, set the max rows displayed in the output as well
pd.set_option('display.max_rows', 100)

## Technical Features (Technical Indicators known to be reliable for ML and forecasting)

In [20]:
def calculate_ema(data, period):
    alpha = 2 / (period + 1)
    return data.ewm(alpha=alpha, adjust=False).mean()

def calculate_dema(data, period):
    ema1 = calculate_ema(data, period)
    ema2 = calculate_ema(ema1, period)
    return 2 * ema1 - ema2


# Define a function to compute the moving averages according to the type.
def moving_average(df, ma_type, period):
    if ma_type == "DEMA":
        return calculate_dema(df, period)
    elif ma_type == "EMA":
        return calculate_ema(df, period)
    elif ma_type == "SMA":
        return df.rolling(window=period).mean()


In [21]:
def macd(data, short_window=12, long_window=26, signal_window=9):
    short_ema = data['Close'].ewm(span=short_window, adjust=False).mean()
    long_ema = data['Close'].ewm(span=long_window, adjust=False).mean()
    data['MACD'] = short_ema - long_ema
    data['Signal'] = data['MACD'].ewm(span=signal_window, adjust=False).mean()
    return data

def rsi(data, periods=14, ema=True):
    close_delta = data['Close'].diff()
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    if ema:
        ma_up = up.ewm(com=periods - 1, adjust=True, min_periods=periods).mean()
        ma_down = down.ewm(com=periods - 1, adjust=True, min_periods=periods).mean()
    else:
        ma_up = up.rolling(window=periods).mean()
        ma_down = down.rolling(window=periods).mean()
    rsi = ma_up / ma_down
    data['RSI'] = 100 - (100 / (1 + rsi))
    return data


def obv(data):
    data['OBV'] = np.where(data['Close'] > data['Close'].shift(1), data['Volume'],
                           np.where(data['Close'] < data['Close'].shift(1), -data['Volume'], 0)).cumsum()
    return data

def atr(data, window=14):
    high_low = data['High'] - data['Low']
    high_close = np.abs(data['High'] - data['Close'].shift())
    low_close = np.abs(data['Low'] - data['Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)
    data['ATR'] = true_range.rolling(window=window).mean()
    return data

def stochastic_oscillator(data, window=14):
    low_min = data['Low'].rolling(window=window).min()
    high_max = data['High'].rolling(window=window).max()
    data['%K'] = 100 * ((data['Close'] - low_min) / (high_max - low_min))
    data['%D'] = data['%K'].rolling(window=3).mean()
    return data

In [22]:
def woodie_pivots(data):
    high = data['High']
    low = data['Low']
    close = data['Close']
    pivot = (high + low + 2 * close) / 4
    data['Pivot'] = pivot
    data['R1'] = 2 * pivot - low
    data['S1'] = 2 * pivot - high
    data['R2'] = pivot + (high - low)
    data['S2'] = pivot - (high - low)
    data['R3'] = high + 2 * (pivot - low)
    data['S3'] = low - 2 * (high - pivot)
    data['R4'] = pivot + 3 * (high - low)
    data['S4'] = pivot - 3 * (high - low)
    return data

In [24]:
def bollinger_bands(data, window=20, num_std=2):
    rolling_mean = data['Close'].rolling(window=window).mean()
    rolling_std = data['Close'].rolling(window=window).std()
    data['Bollinger_High'] = rolling_mean + (rolling_std * num_std)
    data['Bollinger_Low'] = rolling_mean - (rolling_std * num_std)
    return data

# Calculate the predictive rolling mean and standard deviation
def predict_bollinger_bands(series, period, num_std, shift_periods):
    rolling_mean = series.rolling(window=period).mean()
    rolling_std = series.rolling(window=period).std()

    # Predict the future values by shifting the mean and std
    predicted_mean = rolling_mean.shift(-shift_periods)
    predicted_std = rolling_std.shift(-shift_periods)

    # Calculate the upper and lower predictive bands
    predicted_upper_band = predicted_mean + (predicted_std * num_std)
    predicted_lower_band = predicted_mean - (predicted_std * num_std)

    return predicted_upper_band, predicted_lower_band

# Calculate the predictive bands for 3 periods ahead

# Calculate the rolling mean and standard deviation for the Bollinger Bands
def calculate_bollinger_bands(series, period, num_std):
    rolling_mean = series.rolling(window=period).mean()
    rolling_std = series.rolling(window=period).std()
    upper_band = rolling_mean + (rolling_std * num_std)
    lower_band = rolling_mean - (rolling_std * num_std)
    return upper_band, rolling_mean, lower_band

# Calculate predictive Bollinger Bands by shifting the bands forward
def calculate_predictive_bands(series, period, num_std, shift_periods):
    upper_band, middle_band, lower_band = calculate_bollinger_bands(series, period, num_std)
    # Shift the calculated bands forward by the shift_periods
    predictive_upper_band = upper_band.shift(periods=shift_periods)
    predictive_lower_band = lower_band.shift(periods=shift_periods)
    # For the predicted SMA, simply use the shifted rolling mean
    predictive_middle_band = middle_band.shift(periods=shift_periods)
    return predictive_upper_band, predictive_middle_band, predictive_lower_band



In [25]:
############################################################################################################
# Process other non-stationary data
############################################################################################################
# Function to detrend time series data using a linear regression model
def detrend_data(data, column):
    # Linear regression model requires reshaped index as a feature
    X = np.arange(len(data)).reshape(-1, 1)
    y = data[column].values  # Original values to detrend
    
    # Create and fit the model
    model = LinearRegression()
    model.fit(X, y)
    
    # Predict the trend
    trend = model.predict(X)
    
    # Detrend by subtracting the trend from the original data
    detrended = y - trend
    data[f'{column}_detrended'] = detrended
    
    # Return the detrended data and the trend for further analysis
    return data, trend

def seasonal_decomposition(data, column, period):
    # Perform seasonal decomposition
    decomposition = seasonal_decompose(data[column], model='multiplicative', period=period)
    
    # Add components to DataFrame
    data['trend_component'] = decomposition.trend
    data['seasonal_component'] = decomposition.seasonal
    data['residual_component'] = decomposition.resid
    
    # Seasonally adjust the data
    data[column + '_seasonally_adjusted'] = data[column] / data['seasonal_component']
    
    return data

# Function to calculate price differences
def calculate_price_differences(data, column):
    data[f'{column}_diff'] = data[column].diff()
    return data

# Function to calculate log returns
def calculate_log_returns(data, column):
    data[f'{column}_log_return'] = np.log(data[column] / data[column].shift(1))
    return data

# Function to calculate volume changes
def calculate_volume_changes(data, volume_column):
    data[f'{volume_column}_changes'] = data[volume_column].diff()
    return data


## Data Preparation Helpers

In [28]:

def fetch_fundamentals(ticker):
    """
    Fetches comprehensive fundamental data for a given ticker, including balance sheet and cash flow,
    and returns it as a DataFrame.

    Args:
    - ticker (str): The ticker symbol of the stock.

    Returns:
    - DataFrame: DataFrame with merged market, technical, and fundamental data.
    """
    try:
        # Define start date and end date based on current date and one year ago
        end_date = datetime.now().strftime('%Y-%m-%d')
        start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')

        ticker_obj = yf.Ticker(ticker)
        
        # Fetch Beta from ticker's info
        beta_value = ticker_obj.info.get('beta', 0)
        
        balance_sheet = ticker_obj.balance_sheet
        cashflow = ticker_obj.cashflow

        balance_sheet_transposed = balance_sheet.T
        cashflow_transposed = cashflow.T

        fundamentals = pd.concat([balance_sheet_transposed, cashflow_transposed], axis=1)
        fundamentals.index.names = ['Date']
        
        # Insert Beta as the first column
        fundamentals.insert(0, 'Beta', beta_value)

        fundamentals.fillna(method='backfill', inplace=True)
        fundamentals.fillna(method='ffill', inplace=True)
        fundamentals.fillna(0, inplace=True)

        # Example of calculating growth rate of free cash flows (replace with your actual data)
        free_cash_flows = pd.Series([100, 120, 140, 160, 180])
        growth_rate = free_cash_flows.pct_change().mean()
        print("Free Cash Flow Growth Rate:", growth_rate)

        return fundamentals

    except Exception as e:
        print(f"Failed to fetch or process fundamental data for {ticker}: {e}")
        return pd.DataFrame()  # Return empty DataFrame in case of failure

# Apply the function to fetch and merge fundamental data for MSFT
fundamental_data = fetch_fundamentals('MSFT')

# Displaying the first few rows of the fundamental dataset
fundamental_data


Free Cash Flow Growth Rate: 0.15863095238095237


  fundamentals.fillna(method='backfill', inplace=True)
  fundamentals.fillna(method='backfill', inplace=True)
  fundamentals.fillna(method='ffill', inplace=True)


Unnamed: 0_level_0,Beta,Ordinary Shares Number,Share Issued,Net Debt,Total Debt,Tangible Book Value,Invested Capital,Working Capital,Net Tangible Assets,Capital Lease Obligations,Common Stock Equity,Total Capitalization,Total Equity Gross Minority Interest,Stockholders Equity,Gains Losses Not Affecting Retained Earnings,Other Equity Adjustments,Retained Earnings,Capital Stock,Common Stock,Total Liabilities Net Minority Interest,Total Non Current Liabilities Net Minority Interest,Other Non Current Liabilities,Tradeand Other Payables Non Current,Non Current Deferred Liabilities,Non Current Deferred Revenue,Non Current Deferred Taxes Liabilities,Long Term Debt And Capital Lease Obligation,Long Term Capital Lease Obligation,Long Term Debt,Current Liabilities,Other Current Liabilities,Current Deferred Liabilities,Current Deferred Revenue,Current Debt And Capital Lease Obligation,Current Debt,Pensionand Other Post Retirement Benefit Plans Current,Payables And Accrued Expenses,Payables,Total Tax Payable,Income Tax Payable,Accounts Payable,Total Assets,Total Non Current Assets,Other Non Current Assets,Investments And Advances,Long Term Equity Investment,Goodwill And Other Intangible Assets,Other Intangible Assets,Goodwill,Net PPE,Accumulated Depreciation,Gross PPE,Leases,Other Properties,Machinery Furniture Equipment,Buildings And Improvements,Land And Improvements,Properties,Current Assets,Other Current Assets,Hedging Assets Current,Inventory,Finished Goods,Work In Process,Raw Materials,Receivables,Accounts Receivable,Allowance For Doubtful Accounts Receivable,Gross Accounts Receivable,Cash Cash Equivalents And Short Term Investments,Other Short Term Investments,Cash And Cash Equivalents,Cash Equivalents,Cash Financial,Free Cash Flow,Repurchase Of Capital Stock,Repayment Of Debt,Issuance Of Debt,Issuance Of Capital Stock,Capital Expenditure,End Cash Position,Beginning Cash Position,Effect Of Exchange Rate Changes,Changes In Cash,Financing Cash Flow,Cash Flow From Continuing Financing Activities,Net Other Financing Charges,Cash Dividends Paid,Common Stock Dividend Paid,Net Common Stock Issuance,Common Stock Payments,Common Stock Issuance,Net Issuance Payments Of Debt,Net Short Term Debt Issuance,Net Long Term Debt Issuance,Long Term Debt Payments,Long Term Debt Issuance,Investing Cash Flow,Cash Flow From Continuing Investing Activities,Net Other Investing Changes,Net Investment Purchase And Sale,Sale Of Investment,Purchase Of Investment,Net Business Purchase And Sale,Purchase Of Business,Net PPE Purchase And Sale,Purchase Of PPE,Operating Cash Flow,Cash Flow From Continuing Operating Activities,Change In Working Capital,Change In Other Working Capital,Change In Other Current Liabilities,Change In Other Current Assets,Change In Payables And Accrued Expense,Change In Payable,Change In Account Payable,Change In Inventory,Change In Receivables,Changes In Account Receivables,Stock Based Compensation,Deferred Tax,Deferred Income Tax,Depreciation Amortization Depletion,Depreciation And Amortization,Depreciation,Operating Gains Losses,Gain Loss On Investment Securities,Net Income From Continuing Operations
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1
2023-06-30,0.89,7432000000.0,7432000000.0,12533000000.0,59965000000.0,128971000000.0,253460000000.0,80108000000.0,128971000000.0,12728000000.0,206223000000.0,248213000000.0,206223000000.0,206223000000.0,-6343000000.0,-6343000000.0,118848000000.0,93718000000.0,93718000000.0,205753000000.0,101604000000.0,17981000000.0,25560000000.0,3345000000.0,2912000000.0,433000000.0,54718000000.0,12728000000.0,41990000000.0,104149000000.0,14745000000.0,50901000000.0,50901000000.0,5247000000.0,5247000000.0,11009000000.0,22247000000.0,22247000000.0,4152000000.0,4152000000.0,18095000000.0,411976000000.0,227719000000.0,30601000000.0,9879000000.0,9879000000.0,77252000000.0,9366000000.0,67886000000.0,109987000000.0,-68251000000.0,178238000000.0,8537000000.0,14346000000.0,81207000000.0,68465000000.0,5683000000.0,0.0,184257000000.0,21807000000.0,6000000.0,2500000000.0,1768000000.0,23000000.0,709000000.0,48688000000.0,48688000000.0,-650000000.0,49338000000.0,111256000000.0,76552000000.0,34704000000.0,26226000000.0,8478000000.0,59475000000.0,-22245000000.0,-2750000000.0,0.0,1866000000.0,-28107000000.0,34704000000.0,13931000000.0,-194000000.0,20967000000.0,-43935000000.0,-43935000000.0,-1006000000.0,-19800000000.0,-19800000000.0,-20379000000.0,-22245000000.0,1866000000.0,-2750000000.0,0.0,-2750000000.0,-2750000000.0,0.0,-22680000000.0,-22680000000.0,-3116000000.0,10213000000.0,47864000000.0,-37651000000.0,-1670000000.0,-1670000000.0,-28107000000.0,-28107000000.0,87582000000.0,87582000000.0,-2388000000.0,5177000000.0,2825000000.0,-4824000000.0,-2721000000.0,-2721000000.0,-2721000000.0,1242000000.0,-4087000000.0,-4087000000.0,9611000000.0,-6059000000.0,-6059000000.0,13861000000.0,13861000000.0,13861000000.0,196000000.0,-219000000.0,72361000000.0
2022-06-30,0.89,7464000000.0,7464000000.0,35850000000.0,61270000000.0,87720000000.0,216323000000.0,74602000000.0,87720000000.0,11489000000.0,166542000000.0,213574000000.0,166542000000.0,166542000000.0,-4678000000.0,-4678000000.0,84281000000.0,86939000000.0,86939000000.0,198298000000.0,103216000000.0,15526000000.0,26069000000.0,3100000000.0,2870000000.0,230000000.0,58521000000.0,11489000000.0,47032000000.0,95082000000.0,13067000000.0,45538000000.0,45538000000.0,2749000000.0,2749000000.0,10661000000.0,23067000000.0,23067000000.0,4067000000.0,4067000000.0,19000000000.0,364840000000.0,195156000000.0,21897000000.0,6891000000.0,6891000000.0,78822000000.0,11298000000.0,67524000000.0,87546000000.0,-59660000000.0,147206000000.0,7819000000.0,13148000000.0,66491000000.0,55014000000.0,4734000000.0,0.0,169684000000.0,16924000000.0,8000000.0,3742000000.0,2516000000.0,82000000.0,1144000000.0,44261000000.0,44261000000.0,-633000000.0,44894000000.0,104749000000.0,90818000000.0,13931000000.0,5673000000.0,8258000000.0,65149000000.0,-32696000000.0,-9023000000.0,0.0,1841000000.0,-23886000000.0,13931000000.0,14224000000.0,-141000000.0,-152000000.0,-58876000000.0,-58876000000.0,-863000000.0,-18135000000.0,-18135000000.0,-30855000000.0,-32696000000.0,1841000000.0,-9023000000.0,0.0,-9023000000.0,-9023000000.0,0.0,-30311000000.0,-30311000000.0,-2825000000.0,18438000000.0,44894000000.0,-26456000000.0,-22038000000.0,-22038000000.0,-23886000000.0,-23886000000.0,89035000000.0,89035000000.0,446000000.0,5805000000.0,3169000000.0,-3514000000.0,2943000000.0,2943000000.0,2943000000.0,-1123000000.0,-6834000000.0,-6834000000.0,7502000000.0,-5702000000.0,-5702000000.0,14460000000.0,14460000000.0,14460000000.0,-409000000.0,-219000000.0,72738000000.0
2021-06-30,0.89,7519000000.0,7519000000.0,43922000000.0,67775000000.0,84477000000.0,200134000000.0,95749000000.0,84477000000.0,9629000000.0,141988000000.0,192062000000.0,141988000000.0,141988000000.0,1822000000.0,1822000000.0,57055000000.0,83111000000.0,83111000000.0,191791000000.0,103134000000.0,13427000000.0,27190000000.0,2814000000.0,2616000000.0,198000000.0,59703000000.0,9629000000.0,50074000000.0,88657000000.0,11666000000.0,41525000000.0,41525000000.0,8072000000.0,8072000000.0,10057000000.0,17337000000.0,17337000000.0,2174000000.0,2174000000.0,15163000000.0,333779000000.0,149373000000.0,15075000000.0,5984000000.0,5984000000.0,57511000000.0,7800000000.0,49711000000.0,70803000000.0,-51351000000.0,122154000000.0,6884000000.0,11088000000.0,56594000000.0,43928000000.0,3660000000.0,0.0,184406000000.0,13393000000.0,78000000.0,2636000000.0,1367000000.0,79000000.0,1190000000.0,38043000000.0,38043000000.0,-751000000.0,38794000000.0,130256000000.0,116032000000.0,14224000000.0,6952000000.0,7272000000.0,56118000000.0,-27385000000.0,-3750000000.0,0.0,1693000000.0,-20622000000.0,14224000000.0,13576000000.0,-29000000.0,677000000.0,-48486000000.0,-48486000000.0,-2523000000.0,-16521000000.0,-16521000000.0,-25692000000.0,-27385000000.0,1693000000.0,-3750000000.0,0.0,-3750000000.0,-3750000000.0,0.0,-27577000000.0,-27577000000.0,-922000000.0,2876000000.0,65800000000.0,-62924000000.0,-8909000000.0,-8909000000.0,-20622000000.0,-20622000000.0,76740000000.0,76740000000.0,-936000000.0,2324000000.0,5551000000.0,-4391000000.0,2798000000.0,2798000000.0,2798000000.0,-737000000.0,-6481000000.0,-6481000000.0,6118000000.0,-150000000.0,-150000000.0,11686000000.0,11686000000.0,11686000000.0,-1249000000.0,-219000000.0,61271000000.0
2020-06-30,0.89,7571000000.0,7571000000.0,49751000000.0,70998000000.0,67915000000.0,181631000000.0,109605000000.0,67915000000.0,7671000000.0,118304000000.0,177882000000.0,118304000000.0,118304000000.0,3186000000.0,3186000000.0,34566000000.0,80552000000.0,80552000000.0,183007000000.0,110697000000.0,10632000000.0,29432000000.0,3384000000.0,3180000000.0,204000000.0,67249000000.0,7671000000.0,59578000000.0,72310000000.0,10027000000.0,36000000000.0,36000000000.0,3749000000.0,3749000000.0,7874000000.0,14660000000.0,14660000000.0,2130000000.0,2130000000.0,12530000000.0,301311000000.0,119396000000.0,13138000000.0,2965000000.0,2965000000.0,50389000000.0,7038000000.0,43351000000.0,52904000000.0,-43197000000.0,96101000000.0,5487000000.0,8753000000.0,46043000000.0,33995000000.0,1823000000.0,0.0,181915000000.0,11482000000.0,0.0,1895000000.0,1112000000.0,83000000.0,700000000.0,32011000000.0,32011000000.0,-788000000.0,32799000000.0,136527000000.0,122951000000.0,13576000000.0,7666000000.0,5910000000.0,45234000000.0,-22968000000.0,-5518000000.0,0.0,1343000000.0,-15441000000.0,13576000000.0,11356000000.0,-201000000.0,2421000000.0,-46031000000.0,-46031000000.0,-3751000000.0,-15137000000.0,-15137000000.0,-21625000000.0,-22968000000.0,1343000000.0,-5518000000.0,0.0,-5518000000.0,-5518000000.0,0.0,-12223000000.0,-12223000000.0,-1241000000.0,6980000000.0,84170000000.0,-77190000000.0,-2521000000.0,-2521000000.0,-15441000000.0,-15441000000.0,60675000000.0,60675000000.0,-1483000000.0,-1419000000.0,2694000000.0,-3367000000.0,3018000000.0,3018000000.0,3018000000.0,168000000.0,-2577000000.0,-2577000000.0,5289000000.0,11000000.0,11000000.0,12796000000.0,12796000000.0,12796000000.0,-219000000.0,-219000000.0,44281000000.0


In [27]:
def fetch_technical_data(ticker='SPY', start_year=1993, end_year=None, interval='1d', calculate_indicators=False, include_fundamentals=False, export_csv=False, csv_file=None,):
    """
    Fetches data for a specified ticker from Yahoo Finance from the given start year to the current year or specified end year at specified intervals.
    
    Parameters:
        ticker (str): The ticker symbol for the asset. Defaults to 'SPY'.
        start_year (int): The year from which to start fetching the data. Defaults to 1993.
        end_year (int): The last year for which to fetch the data. Defaults to the current year if None.
        interval (str): The data interval ('1d' for daily, '1wk' for weekly, '1mo' for monthly, '1h' for hourly).
        export_csv (bool): Whether to export the data to a CSV file. Defaults to False.
        csv_file (str): The path of the CSV file to export the data to. Automatically determined if None.
        calculate_indicators (bool): Flag to calculate technical indicators.
        include_fundamentals (bool): Flag to include fundamental data.

    Returns:
        DataFrame: DataFrame containing the requested financial data.
    """
    # Adjust for hourly data to limit to the last 730 days
    if interval == '1h':
        start_date = (datetime.now() - timedelta(days=730)).strftime('%Y-%m-%d')
    else:
        start_date = f'{start_year}-01-01'
    
    if end_year is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    else:
        end_date = f"{end_year}-12-31"
    
    if csv_file is None:
        csv_file = f'{ticker}_{interval}_data_{start_date}_to_{end_date}.csv'
    
    data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)
    
    if not data.empty:
        if calculate_indicators:
            # Here you would call your indicator functions on the `data` DataFrame
            data = bollinger_bands(data)
            data = macd(data)
            data = rsi(data)
            data = woodie_pivots(data)
            data = obv(data)
            data = atr(data)
            data = stochastic_oscillator(data)

            # Non-stationary data processing
            data = calculate_price_differences(data, 'Close')  # Calculate price differences
            data = calculate_log_returns(data, 'Close')  # Calculate log returns for the 'Close' column
            data = calculate_volume_changes(data, 'Volume')  # Calculate volume changes
            # Handling NaN values by forward filling then dropping rows with NaN values
            data.ffill(inplace=True)
            data.dropna(inplace=True)
    return data

MSFT = fetch_technical_data('MSFT', interval="1d", calculate_indicators=True, include_fundamentals=True)

MSFT.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Bollinger_High,Bollinger_Low,MACD,Signal,RSI,Pivot,R1,S1,R2,S2,R3,S3,R4,S4,OBV,ATR,%K,%D,Close_diff,Close_log_return,Volume_changes
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
1993-01-29,2.734375,2.75,2.679688,2.703125,1.672746,39424000,2.864005,2.658652,0.008243,0.021679,43.908854,2.708984,2.738281,2.667969,2.779297,2.638672,2.808593,2.597657,2.919921,2.498048,-96377600,0.077846,18.918785,17.355565,-0.015625,-0.005764,-37606400.0
1993-02-01,2.695313,2.75,2.671875,2.734375,1.692085,42854400,2.85714,2.672939,0.006781,0.0187,49.769723,2.722656,2.773438,2.695312,2.800781,2.644531,2.851562,2.617188,2.957031,2.488281,-53523200,0.077846,29.729614,24.324199,0.03125,0.011494,3430400.0
1993-02-02,2.730469,2.796875,2.726563,2.78125,1.721092,70371200,2.856504,2.681387,0.009298,0.016819,57.02365,2.771484,2.816406,2.746094,2.841797,2.701172,2.886718,2.675782,2.982421,2.560548,16848000,0.07394,45.945857,31.531419,0.046875,0.016998,27516800.0
1993-02-03,2.796875,2.820313,2.75,2.761719,1.709005,71728000,2.855385,2.680552,0.009606,0.015377,53.553385,2.773438,2.796875,2.726562,2.843751,2.703125,2.867188,2.65625,2.984377,2.562499,-54880000,0.071149,58.00002,44.558497,-0.019531,-0.007047,1356800.0
1993-02-04,2.742188,2.742188,2.640625,2.65625,1.643739,124214400,2.863732,2.665175,0.001325,0.012566,39.554622,2.673828,2.707031,2.605469,2.775391,2.572265,2.808594,2.503906,2.978517,2.369139,-179094400,0.070312,8.695629,37.547168,-0.105469,-0.038938,52486400.0


# Data Preprocessing

In [None]:
"""
calculate_returns(data, periods): Calculates returns over specified periods (e.g., 1, 2, 3 months) for given stock prices.
calculate_volatility(data, periods): Computes volatility over specified periods for given stock prices.
calculate_moving_averages(data, periods): Calculates moving averages over specified periods for given stock prices.
calculate_MA_gaps(data, periods): Computes moving average gaps over specified periods to capture trends.
apply_log_transform(data, columns): Applies logarithmic transformation to specified columns to normalize data.
add_lag_features(data, columns, lags): Adds lagged features for specified columns and lag periods to capture time-series dependencies.
"""

# Advanced Feature Engineering

In [None]:
"""
encode_cyclical_features(data, column, max_val): Encodes cyclical features like days of the week or months in a year using sine and cosine transformations to preserve their cyclical nature.
calculate_technical_indicators(data, indicators): Calculates a range of technical indicators (e.g., RSI, MACD, Bollinger Bands) specified by the user for given stock prices.
apply_PCA_reduction(data, n_components): Applies PCA (Principal Component Analysis) to reduce the dimensionality of the feature space while retaining n_components principal components.
"""


# Trading Strategy Simulation and Evaluation

In [None]:
"""
simulate_trading_strategy(predictions, transaction_costs): Simulates a trading strategy based on model predictions and calculates net profit after accounting for transaction costs.
calculate_max_drawdown(returns): Calculates the maximum drawdown from a series of returns, useful for risk assessment.
backtest_strategy(time_series, strategy_function): Conducts a backtest of a trading strategy function on a historical time series dataset to evaluate performance over time.
"""



# Model Training and Evaluation




In [None]:
"""
train_test_split_time_series(data, test_ratio): Splits time-series data into training and testing sets with a specified ratio, preserving the temporal order.
cross_validate_time_series(model, data, cv_splits): Performs time-series cross-validation with a given model, data, and number of splits.
evaluate_model_performance(model, X_test, y_test, metrics): Evaluates the model performance using specified metrics (e.g., RMSE, MAE, Pearson correlation).
hyperparameter_optimization(model, param_grid, X_train, y_train): Conducts hyperparameter optimization for a given model and parameter grid.
"""



# Prediction and Ranking

In [None]:
"""
predict_and_rank(model, data, features): Generates predictions using a trained model and ranks the predictions for trading strategies.
adjust_predictions_based_on_median(data, securities_to_adjust): Dynamically adjusts predictions based on the median target value for specific securities.
calculate_spread_return_sharpe(data, portfolio_size, weight_ratio): Calculates the Sharpe ratio based on spread return for a given portfolio size and weight ratio.
"""

# Data Visualization

In [None]:
"""
plot_stock_prices(data, title): Plots stock prices or returns over time.
plot_feature_importances(model, feature_names): Plots the feature importances for a trained model.
plot_prediction_vs_actual(data, predictions, actual, title): Plots predicted vs. actual values for visual comparison.
"""


# Data Integrity and Cleaning

In [None]:
"""
detect_outliers(data, method): Detects outliers in the dataset using a specified method (e.g., IQR, Z-score).
impute_missing_values(data, imputation_strategy): Imputes missing values using a specified strategy (e.g., mean, median, k-NN).
"""


# 

# 