In [24]:
from stock_data import fetch_single_stock_data, fetch_multiple_stocks_data
from stock_features import (
    calculate_daily_returns,
    create_next_day_targets,
    add_lagged_features,
    add_price_range_features,
    calculate_true_range,
    calculate_atr,
    add_volume_features,
    calculate_obv,
    calculate_rsi,
    calculate_macd,
    add_moving_averages,
    add_bollinger_bands,
    calculate_stochastic_oscillator,
    calculate_adx,
    add_time_based_features
)
import pandas as pd


In [25]:
df_single_stock = fetch_single_stock_data("AAPL", "2010-01-01", "2024-12-31")
df_single_stock.head()


Fetching data for AAPL from 2010-01-01 to 2024-12-31...


  stock_data = yf.download(ticker, start=start_date, end=end_date, progress=False)


Unnamed: 0_level_0,Close_AAPL,High_AAPL,Low_AAPL,Open_AAPL,Volume_AAPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,6.431895,6.446622,6.382907,6.414464,493729600
2010-01-05,6.443017,6.479382,6.409055,6.449629,601904800
2010-01-06,6.340533,6.468564,6.333921,6.443018,552160000
2010-01-07,6.32881,6.371487,6.282827,6.363974,477131200
2010-01-08,6.370885,6.371487,6.283128,6.320394,447610800


In [26]:
# Get the first column name from the DataFrame
first_column_name = df_single_stock.columns[0] # This will be 'Close_AAPL'

# Split the column name by the underscore '_'
parts = first_column_name.split('_') # This will give ['Close', 'AAPL']

# The ticker prefix is the second element (index 1) after splitting
ticker_prefix = parts[1]

print(f"The extracted ticker prefix is: {ticker_prefix}")


The extracted ticker prefix is: AAPL


In [27]:
df = add_price_range_features(df_single_stock, ticker_prefix)
df = calculate_true_range(df, ticker_prefix)
df = add_volume_features(df, ticker_prefix)
df = calculate_obv(df, ticker_prefix)
df.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[f'{ticker_prefix}_Close_to_Range_Ratio'].fillna(0.5, inplace=True) # If range is 0, assume middle
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[f'{ticker_prefix}_Volume_MA_Ratio'].replace([np.inf, -np.inf], np.nan, inplace=True)
You are setting values through chained assi

Unnamed: 0_level_0,Close_AAPL,High_AAPL,Low_AAPL,Open_AAPL,Volume_AAPL,AAPL_HighLow_Range,AAPL_OpenClose_Range,AAPL_Close_to_Range_Ratio,AAPL_True_Range,AAPL_Volume_Daily_Change,AAPL_Volume_MA_20D,AAPL_Volume_MA_Ratio,AAPL_OBV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-01-04,6.431895,6.446622,6.382907,6.414464,493729600,0.063715,0.017431,0.768868,,,493729600.0,1.0,0.0
2010-01-05,6.443017,6.479382,6.409055,6.449629,601904800,0.070327,-0.006612,0.482909,0.070327,21.909807,547817200.0,1.098733,601904800.0
2010-01-06,6.340533,6.468564,6.333921,6.443018,552160000,0.134643,-0.102485,0.049108,0.134643,-8.264563,549264800.0,1.005271,49744800.0
2010-01-07,6.32881,6.371487,6.282827,6.363974,477131200,0.08866,-0.035164,0.51864,0.08866,-13.588235,531231400.0,0.898161,-427386400.0
2010-01-08,6.370885,6.371487,6.283128,6.320394,447610800,0.088359,0.050491,0.993193,0.088359,-6.187061,514507280.0,0.86998,20224400.0


In [28]:
df = calculate_rsi(df, ticker_prefix)
df.head()

  - Calculating RSI for AAPL (window=14)...


Unnamed: 0_level_0,Close_AAPL,High_AAPL,Low_AAPL,Open_AAPL,Volume_AAPL,AAPL_HighLow_Range,AAPL_OpenClose_Range,AAPL_Close_to_Range_Ratio,AAPL_True_Range,AAPL_Volume_Daily_Change,AAPL_Volume_MA_20D,AAPL_Volume_MA_Ratio,AAPL_OBV,AAPL_RSI14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-01-04,6.431895,6.446622,6.382907,6.414464,493729600,0.063715,0.017431,0.768868,,,493729600.0,1.0,0.0,
2010-01-05,6.443017,6.479382,6.409055,6.449629,601904800,0.070327,-0.006612,0.482909,0.070327,21.909807,547817200.0,1.098733,601904800.0,
2010-01-06,6.340533,6.468564,6.333921,6.443018,552160000,0.134643,-0.102485,0.049108,0.134643,-8.264563,549264800.0,1.005271,49744800.0,
2010-01-07,6.32881,6.371487,6.282827,6.363974,477131200,0.08866,-0.035164,0.51864,0.08866,-13.588235,531231400.0,0.898161,-427386400.0,
2010-01-08,6.370885,6.371487,6.283128,6.320394,447610800,0.088359,0.050491,0.993193,0.088359,-6.187061,514507280.0,0.86998,20224400.0,


In [29]:
df = calculate_macd(df, ticker_prefix)
df.head()

  - Calculating MACD for AAPL (fast=12, slow=26, signal=9)...


Unnamed: 0_level_0,Close_AAPL,High_AAPL,Low_AAPL,Open_AAPL,Volume_AAPL,AAPL_HighLow_Range,AAPL_OpenClose_Range,AAPL_Close_to_Range_Ratio,AAPL_True_Range,AAPL_Volume_Daily_Change,AAPL_Volume_MA_20D,AAPL_Volume_MA_Ratio,AAPL_OBV,AAPL_RSI14,AAPL_MACD_Line,AAPL_MACD_Signal,AAPL_MACD_Hist
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2010-01-04,6.431895,6.446622,6.382907,6.414464,493729600,0.063715,0.017431,0.768868,,,493729600.0,1.0,0.0,,,,
2010-01-05,6.443017,6.479382,6.409055,6.449629,601904800,0.070327,-0.006612,0.482909,0.070327,21.909807,547817200.0,1.098733,601904800.0,,,,
2010-01-06,6.340533,6.468564,6.333921,6.443018,552160000,0.134643,-0.102485,0.049108,0.134643,-8.264563,549264800.0,1.005271,49744800.0,,,,
2010-01-07,6.32881,6.371487,6.282827,6.363974,477131200,0.08866,-0.035164,0.51864,0.08866,-13.588235,531231400.0,0.898161,-427386400.0,,,,
2010-01-08,6.370885,6.371487,6.283128,6.320394,447610800,0.088359,0.050491,0.993193,0.088359,-6.187061,514507280.0,0.86998,20224400.0,,,,
