# Technical Indicators

## 0. Imports and Data

In [38]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv("TSLA.csv")
data['Volume'] = data['Volume'].astype(float)
data = data.drop('Adj Close', axis=1)
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

In [3]:
data.dtypes

Open      float64
High      float64
Low       float64
Close     float64
Volume    float64
dtype: object

In [4]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0
...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0


In [5]:
df = data

## 1. Description & Code

### A. Moving Average (MA(5) & MA(20))

In [6]:
def moving_averages(df, column_name='Close', window_sizes=[5, 20]):
    """
    Add Moving Averages (MA) columns to the DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which moving averages are calculated.
    - window_sizes (list): List of window sizes for moving averages. Default is [5, 20].

    Returns:
    - pd.DataFrame: DataFrame with added MA columns.
    """
    for window_size in window_sizes:
        ma_column_name = f'MA_{window_size}'
        df[ma_column_name] = df[column_name].rolling(window=window_size).mean()

    return df

In [7]:
moving_averages(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,
...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002


### B. Bollinger Band (BB up & BB down)

In [8]:
def bollinger_bands(df, column_name='Close', window_size=20, num_std_dev=2):
    """
    Calculate Bollinger Bands for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which Bollinger Bands are calculated.
    - window_size (int): Window size for the moving average. Default is 20.
    - num_std_dev (int): Number of standard deviations for the upper and lower bands. Default is 2.

    Returns:
    - pd.DataFrame: DataFrame with added columns for Bollinger Bands (BB up, BB down).
    """
    # Calculate the rolling mean (middle band)
    df['MiddleBand'] = df[column_name].rolling(window=window_size).mean()

    # Calculate the rolling standard deviation
    df['StdDev'] = df[column_name].rolling(window=window_size).std()

    # Calculate Bollinger Bands
    df['BB_Up'] = df['MiddleBand'] + num_std_dev * df['StdDev']
    df['BB_Down'] = df['MiddleBand'] - num_std_dev * df['StdDev']

    # Drop intermediate columns
    df.drop(['MiddleBand', 'StdDev'], axis=1, inplace=True)

    return df

In [9]:
bollinger_bands(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,
...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893


### C. Relative Difference in the Percentage of the price (RDP(1))

In [10]:
def rdp(df, column_name='Close'):
    """
    Calculate Relative Difference in the Percentage of the price (RDP(1)) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which RDP(1) is calculated.

    Returns:
    - pd.DataFrame: DataFrame with an added column for RDP(1).
    """
    # Calculate RDP(1)
    df['RDP_1'] = df[column_name].pct_change() * 100

    return df

In [11]:
rdp(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749
...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074


### D. Bias Ratio (BIAS(6), BIAS(12) & BIAS(24))

In [12]:
def bias(df, column_name='Close', ma_windows=[6, 12, 24]):
    """
    Calculate Bias Ratios (BIAS) for specified moving average windows for a column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which BIAS is calculated.
    - ma_windows (list): List of moving average window sizes. Default is [6, 12, 24].

    Returns:
    - pd.DataFrame: DataFrame with added columns for BIAS(6), BIAS(12), and BIAS(24).
    """
    for window_size in ma_windows:
        ma_column_name = f'MA_{window_size}'
        bias_column_name = f'BIAS_{window_size}'

        # Calculate the moving average
        df[ma_column_name] = df[column_name].rolling(window=window_size).mean()

        # Calculate BIAS
        df[bias_column_name] = ((df[column_name] - df[ma_column_name]) / df[ma_column_name]) * 100

        # Drop intermediate columns
        df.drop(ma_column_name, axis=1, inplace=True)

    return df

In [13]:
bias(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,BIAS_6,BIAS_12,BIAS_24
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,,,
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,,,
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,,,
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,0.671858,5.271958,15.612383
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,2.152384,6.198360,16.879208
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,10.365869,15.099888,26.625400
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,9.581680,14.915302,26.270638


### E. Relative Strength Index (RSI)

In [14]:
def rsi(df, column_name='Close', window=14):
    """
    Calculate the Relative Strength Index (RSI) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which RSI is calculated. Default is 'Close'.
    - window (int): Window size for RSI calculation. Default is 14.

    Returns:
    - pd.DataFrame: DataFrame with an added column for RSI.
    """
    # Calculate daily price changes
    df['PriceChange'] = df[column_name].diff()

    # Calculate the average gain and average loss over the specified window
    df['Gain'] = df['PriceChange'].apply(lambda x: x if x > 0 else 0).rolling(window=window, min_periods=1).mean()
    df['Loss'] = -df['PriceChange'].apply(lambda x: x if x < 0 else 0).rolling(window=window, min_periods=1).mean()

    # Calculate relative strength (RS)
    df['RS'] = df['Gain'] / df['Loss']

    # Calculate RSI
    df['RSI'] = 100 - (100 / (1 + df['RS']))

    # Drop intermediate columns
    df.drop(['PriceChange', 'Gain', 'Loss', 'RS'], axis=1, inplace=True)

    return df

In [15]:
rsi(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,BIAS_6,BIAS_12,BIAS_24,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,,,,0.000000
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,,,,0.000000
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,,,,0.000000
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,,,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,0.671858,5.271958,15.612383,73.404463
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,2.152384,6.198360,16.879208,72.209166
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,10.365869,15.099888,26.625400,82.015666
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,9.581680,14.915302,26.270638,83.725850


### F. Exponential Moving Average (EMA(12) & EMA(26))

In [16]:
def ema(df, column_name='Close', ema_short=12, ema_long=26):
    """
    Calculate Exponential Moving Averages (EMA) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which EMA is calculated. Default is 'Close'.
    - ema_short (int): Short-term EMA window size. Default is 12.
    - ema_long (int): Long-term EMA window size. Default is 26.

    Returns:
    - pd.DataFrame: DataFrame with added columns for EMA(12) and EMA(26).
    """
    # Calculate EMA(12)
    df['EMA_12'] = df[column_name].ewm(span=ema_short, adjust=False).mean()

    # Calculate EMA(26)
    df['EMA_26'] = df[column_name].ewm(span=ema_long, adjust=False).mean()

    return df

In [17]:
ema(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,BIAS_6,BIAS_12,BIAS_24,RSI,EMA_12,EMA_26
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,,,,,23.889999,23.889999
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,,,,0.000000,23.880768,23.885555
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,,,,0.000000,23.585265,23.742921
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,,,,0.000000,22.910609,23.406408
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,,,,0.000000,21.864362,22.865934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,0.671858,5.271958,15.612383,73.404463,536.273009,491.428204
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,2.152384,6.198360,16.879208,72.209166,543.152545,498.062411
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,10.365869,15.099888,26.625400,82.015666,558.176768,508.636306
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,9.581680,14.915302,26.270638,83.725850,572.391113,519.149913


### G. Moving Average Convergence/Divergence (MACD)

In [18]:
def macd(df, column_name='Close', ema_short=12, ema_long=26, signal_period=9):
    """
    Calculate Moving Average Convergence Divergence (MACD) and its signal line for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which MACD is calculated. Default is 'Close'.
    - ema_short (int): Short-term EMA window size. Default is 12.
    - ema_long (int): Long-term EMA window size. Default is 26.
    - signal_period (int): Signal line EMA window size. Default is 9.

    Returns:
    - pd.DataFrame: DataFrame with added columns for MACD, Signal Line, and MACD Histogram.
    """
    # Calculate short-term EMA
    df['EMA_short'] = df[column_name].ewm(span=ema_short, adjust=False).mean()

    # Calculate long-term EMA
    df['EMA_long'] = df[column_name].ewm(span=ema_long, adjust=False).mean()

    # Calculate MACD Line
    df['DIF'] = df['EMA_short'] - df['EMA_long']

    # Calculate Signal Line
    df['Signal_Line'] = df['DIF'].ewm(span=signal_period, adjust=False).mean()

    # Calculate MACD Histogram
    df['OSC'] = df['DIF'] - df['Signal_Line']

    # Drop intermediate columns
    df.drop(['EMA_short', 'EMA_long'], axis=1, inplace=True)

    return df

In [19]:
macd(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,BIAS_6,BIAS_12,BIAS_24,RSI,EMA_12,EMA_26,DIF,Signal_Line,OSC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,,,,,23.889999,23.889999,0.000000,0.000000,0.000000
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,,,,0.000000,23.880768,23.885555,-0.004786,-0.000957,-0.003829
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,,,,0.000000,23.585265,23.742921,-0.157655,-0.032297,-0.125359
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,,,,0.000000,22.910609,23.406408,-0.495799,-0.124997,-0.370802
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,,,,0.000000,21.864362,22.865934,-1.001572,-0.300312,-0.701260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,0.671858,5.271958,15.612383,73.404463,536.273009,491.428204,44.844805,42.681788,2.163018
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,2.152384,6.198360,16.879208,72.209166,543.152545,498.062411,45.090134,43.163457,1.926677
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,10.365869,15.099888,26.625400,82.015666,558.176768,508.636306,49.540463,44.438858,5.101604
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,9.581680,14.915302,26.270638,83.725850,572.391113,519.149913,53.241199,46.199326,7.041873


### H. Psychological Line (PSY(12) & PSY(24))

In [20]:
def psy(df, column_name='Close', psy_short=12, psy_long=24):
    """
    Calculate Psychological Line (PSY) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which PSY is calculated. Default is 'Close'.
    - psy_short (int): Short-term PSY window size. Default is 12.
    - psy_long (int): Long-term PSY window size. Default is 24.

    Returns:
    - pd.DataFrame: DataFrame with added columns for PSY(12) and PSY(24).
    """
    # Calculate the percentage of days where the closing price is higher than the previous day's closing price
    df['PriceUp'] = df[column_name].diff() > 0

    # Calculate PSY(12)
    df['PSY_12'] = df['PriceUp'].rolling(window=psy_short).mean() * 100

    # Calculate PSY(24)
    df['PSY_24'] = df['PriceUp'].rolling(window=psy_long).mean() * 100

    # Drop intermediate columns
    df.drop(['PriceUp'], axis=1, inplace=True)

    return df

In [21]:
psy(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,BIAS_12,BIAS_24,RSI,EMA_12,EMA_26,DIF,Signal_Line,OSC,PSY_12,PSY_24
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,...,,,,23.889999,23.889999,0.000000,0.000000,0.000000,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,...,,,0.000000,23.880768,23.885555,-0.004786,-0.000957,-0.003829,,
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,...,,,0.000000,23.585265,23.742921,-0.157655,-0.032297,-0.125359,,
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,...,,,0.000000,22.910609,23.406408,-0.495799,-0.124997,-0.370802,,
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,...,,,0.000000,21.864362,22.865934,-1.001572,-0.300312,-0.701260,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,5.271958,15.612383,73.404463,536.273009,491.428204,44.844805,42.681788,2.163018,50.000000,62.500000
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,6.198360,16.879208,72.209166,543.152545,498.062411,45.090134,43.163457,1.926677,58.333333,62.500000
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,15.099888,26.625400,82.015666,558.176768,508.636306,49.540463,44.438858,5.101604,58.333333,62.500000
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,14.915302,26.270638,83.725850,572.391113,519.149913,53.241199,46.199326,7.041873,58.333333,62.500000


### I. Williams %R (WMS%R)

In [22]:
def williams_percent_r(df, high_column='High', low_column='Low', close_column='Close', window=14):
    """
    Calculate Williams %R for a specified high, low, and close columns in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - high_column (str): Name of the column containing high prices. Default is 'High'.
    - low_column (str): Name of the column containing low prices. Default is 'Low'.
    - close_column (str): Name of the column containing close prices. Default is 'Close'.
    - window (int): Window size for Williams %R calculation. Default is 14.

    Returns:
    - pd.DataFrame: DataFrame with an added column for Williams %R.
    """
    # Calculate highest high and lowest low over the specified window
    df['HH'] = df[high_column].rolling(window=window).max()
    df['LL'] = df[low_column].rolling(window=window).min()

    # Calculate Williams %R
    df['Williams_%R'] = (df['HH'] - df[close_column]) / (df['HH'] - df['LL']) * -100

    # Drop intermediate columns
    df.drop(['HH', 'LL'], axis=1, inplace=True)

    return df

In [23]:
williams_percent_r(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,BIAS_24,RSI,EMA_12,EMA_26,DIF,Signal_Line,OSC,PSY_12,PSY_24,Williams_%R
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,...,,,23.889999,23.889999,0.000000,0.000000,0.000000,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,...,,0.000000,23.880768,23.885555,-0.004786,-0.000957,-0.003829,,,
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,...,,0.000000,23.585265,23.742921,-0.157655,-0.032297,-0.125359,,,
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,...,,0.000000,22.910609,23.406408,-0.495799,-0.124997,-0.370802,,,
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,...,,0.000000,21.864362,22.865934,-1.001572,-0.300312,-0.701260,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,15.612383,73.404463,536.273009,491.428204,44.844805,42.681788,2.163018,50.000000,62.500000,-21.857906
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,16.879208,72.209166,543.152545,498.062411,45.090134,43.163457,1.926677,58.333333,62.500000,-11.107465
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,26.625400,82.015666,558.176768,508.636306,49.540463,44.438858,5.101604,58.333333,62.500000,-5.683490
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,26.270638,83.725850,572.391113,519.149913,53.241199,46.199326,7.041873,58.333333,62.500000,-1.509312


### J. Stochastic Oscillator (Stochastic%K & Stochastic%D)

In [24]:
def stochastic_oscillator(df, high_column='High', low_column='Low', close_column='Close', k_window=14, d_window=3):
    """
    Calculate Stochastic Oscillator (%K and %D) for specified high, low, and close columns in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - high_column (str): Name of the column containing high prices. Default is 'High'.
    - low_column (str): Name of the column containing low prices. Default is 'Low'.
    - close_column (str): Name of the column containing close prices. Default is 'Close'.
    - k_window (int): Window size for %K calculation. Default is 14.
    - d_window (int): Window size for %D calculation. Default is 3.

    Returns:
    - pd.DataFrame: DataFrame with added columns for Stochastic %K and %D.
    """
    # Calculate lowest low and highest high over the specified window
    df['LL'] = df[low_column].rolling(window=k_window).min()
    df['HH'] = df[high_column].rolling(window=k_window).max()

    # Calculate Stochastic %K
    df['Stochastic_%K'] = ((df[close_column] - df['LL']) / (df['HH'] - df['LL'])) * 100

    # Calculate Stochastic %D (3-day simple moving average of %K)
    df['Stochastic_%D'] = df['Stochastic_%K'].rolling(window=d_window).mean()

    # Drop intermediate columns
    df.drop(['LL', 'HH'], axis=1, inplace=True)

    return df

In [25]:
stochastic_oscillator(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,EMA_12,EMA_26,DIF,Signal_Line,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,...,23.889999,23.889999,0.000000,0.000000,0.000000,,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,...,23.880768,23.885555,-0.004786,-0.000957,-0.003829,,,,,
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,...,23.585265,23.742921,-0.157655,-0.032297,-0.125359,,,,,
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,...,22.910609,23.406408,-0.495799,-0.124997,-0.370802,,,,,
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,...,21.864362,22.865934,-1.001572,-0.300312,-0.701260,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,536.273009,491.428204,44.844805,42.681788,2.163018,50.000000,62.500000,-21.857906,78.142094,77.695027
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,543.152545,498.062411,45.090134,43.163457,1.926677,58.333333,62.500000,-11.107465,88.892535,80.395990
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,558.176768,508.636306,49.540463,44.438858,5.101604,58.333333,62.500000,-5.683490,94.316510,87.117046
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,572.391113,519.149913,53.241199,46.199326,7.041873,58.333333,62.500000,-1.509312,98.490688,93.899911


### K. Percentage of Price Change (PROC)

In [26]:
def proc(df, column_name='Close', window=1):
    """
    Calculate Percentage of Price Change (PROC) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which PROC is calculated. Default is 'Close'.
    - window (int): Window size for PROC calculation. Default is 1.

    Returns:
    - pd.DataFrame: DataFrame with an added column for PROC.
    """
    # Calculate the percentage change in price using rolling window
    df['PROC'] = df[column_name].pct_change().rolling(window=window).mean() * 100

    return df

In [27]:
proc(df)
df.head(25)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,EMA_26,DIF,Signal_Line,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D,PROC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.0,25.0,17.540001,23.889999,18766300.0,,,,,,...,23.889999,0.0,0.0,0.0,,,,,,
2010-06-30,25.790001,30.42,23.299999,23.83,17187100.0,,,,,-0.251147,...,23.885555,-0.004786,-0.000957,-0.003829,,,,,,-0.251147
2010-07-01,25.0,25.92,20.27,21.959999,8218800.0,,,,,-7.847256,...,23.742921,-0.157655,-0.032297,-0.125359,,,,,,-7.847256
2010-07-02,23.0,23.1,18.709999,19.200001,5139800.0,,,,,-12.568297,...,23.406408,-0.495799,-0.124997,-0.370802,,,,,,-12.568297
2010-07-06,20.0,20.0,15.83,16.110001,6866900.0,20.998,,,,-16.093749,...,22.865934,-1.001572,-0.300312,-0.70126,,,,,,-16.093749
2010-07-07,16.4,16.629999,14.98,15.8,6921700.0,19.38,,,,-1.924277,...,22.342531,-1.411148,-0.522479,-0.888669,,,,,,-1.924277
2010-07-08,16.139999,17.52,15.57,17.459999,7711400.0,18.106,,,,10.506323,...,21.980862,-1.583538,-0.734691,-0.848847,,,,,,10.506323
2010-07-09,17.58,17.9,16.549999,17.4,4050600.0,17.194,,,,-0.343637,...,21.641539,-1.705342,-0.928821,-0.776521,,,,,,-0.343637
2010-07-12,17.950001,18.07,17.0,17.049999,2202500.0,16.764,,,,-2.0115,...,21.301425,-1.809258,-1.104909,-0.70435,,,,,,-2.0115
2010-07-13,17.389999,18.639999,16.9,18.139999,2680100.0,17.169999,,,,6.392962,...,21.067245,-1.783104,-1.240548,-0.542557,,,,,,6.392962


### L. Momentum (MO(1))

In [28]:
def momentum(df, column_name='Close', window=1):
    """
    Calculate Momentum (MO) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which Momentum is calculated. Default is 'Close'.
    - window (int): Window size for Momentum calculation. Default is 1.

    Returns:
    - pd.DataFrame: DataFrame with an added column for Momentum.
    """
    # Calculate the difference in price over the specified window
    df['Momentum'] = df[column_name].diff(window)

    return df

In [29]:
momentum(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,DIF,Signal_Line,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D,PROC,Momentum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,...,0.000000,0.000000,0.000000,,,,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,...,-0.004786,-0.000957,-0.003829,,,,,,-0.251147,-0.059999
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,...,-0.157655,-0.032297,-0.125359,,,,,,-7.847256,-1.870001
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,...,-0.495799,-0.124997,-0.370802,,,,,,-12.568297,-2.759998
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,...,-1.001572,-0.300312,-0.701260,,,,,,-16.093749,-3.090000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,44.844805,42.681788,2.163018,50.000000,62.500000,-21.857906,78.142094,77.695027,1.591341,8.880004
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,45.090134,43.163457,1.926677,58.333333,62.500000,-11.107465,88.892535,80.395990,2.485441,14.089966
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,49.540463,44.438858,5.101604,58.333333,62.500000,-5.683490,94.316510,87.117046,10.296220,59.820008
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,53.241199,46.199326,7.041873,58.333333,62.500000,-1.509312,98.490688,93.899911,1.523074,9.760009


### M. First-Order Lag (LAG(1))

In [30]:
def first_order_lag(df, column_name='Close', lag=1):
    """
    Calculate First-Order Lag (LAG(1)) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - column_name (str): Name of the column for which the lag is calculated. Default is 'Close'.
    - lag (int): Number of periods to lag. Default is 1.

    Returns:
    - pd.DataFrame: DataFrame with an added column for the First-Order Lag.
    """
    # Calculate the First-Order Lag using the shift() method
    df[f'LAG_{lag}'] = df[column_name].shift(lag)

    return df

In [31]:
first_order_lag(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,Signal_Line,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D,PROC,Momentum,LAG_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,...,0.000000,0.000000,,,,,,,,
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,...,-0.000957,-0.003829,,,,,,-0.251147,-0.059999,23.889999
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,...,-0.032297,-0.125359,,,,,,-7.847256,-1.870001,23.830000
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,...,-0.124997,-0.370802,,,,,,-12.568297,-2.759998,21.959999
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,...,-0.300312,-0.701260,,,,,,-16.093749,-3.090000,19.200001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,42.681788,2.163018,50.000000,62.500000,-21.857906,78.142094,77.695027,1.591341,8.880004,558.020020
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,43.163457,1.926677,58.333333,62.500000,-11.107465,88.892535,80.395990,2.485441,14.089966,566.900024
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,44.438858,5.101604,58.333333,62.500000,-5.683490,94.316510,87.117046,10.296220,59.820008,580.989990
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,46.199326,7.041873,58.333333,62.500000,-1.509312,98.490688,93.899911,1.523074,9.760009,640.809998


### N. Trading Volume (VOL)

In [32]:
def trading_volume(df, volume_column='Volume'):
    """
    Calculate Trading Volume (VOL) for a specified column in a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - volume_column (str): Name of the column containing trading volume. Default is 'Volume'.

    Returns:
    - pd.DataFrame: DataFrame with an added column for Trading Volume.
    """
    df['VOL'] = df[volume_column]

    return df

In [33]:
trading_volume(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D,PROC,Momentum,LAG_1,VOL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,19.000000,25.000000,17.540001,23.889999,18766300.0,,,,,,...,0.000000,,,,,,,,,18766300.0
2010-06-30,25.790001,30.420000,23.299999,23.830000,17187100.0,,,,,-0.251147,...,-0.003829,,,,,,-0.251147,-0.059999,23.889999,17187100.0
2010-07-01,25.000000,25.920000,20.270000,21.959999,8218800.0,,,,,-7.847256,...,-0.125359,,,,,,-7.847256,-1.870001,23.830000,8218800.0
2010-07-02,23.000000,23.100000,18.709999,19.200001,5139800.0,,,,,-12.568297,...,-0.370802,,,,,,-12.568297,-2.759998,21.959999,5139800.0
2010-07-06,20.000000,20.000000,15.830000,16.110001,6866900.0,20.998000,,,,-16.093749,...,-0.701260,,,,,,-16.093749,-3.090000,19.200001,6866900.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,2.163018,50.000000,62.500000,-21.857906,78.142094,77.695027,1.591341,8.880004,558.020020,11788500.0
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,1.926677,58.333333,62.500000,-11.107465,88.892535,80.395990,2.485441,14.089966,566.900024,17801500.0
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,5.101604,58.333333,62.500000,-5.683490,94.316510,87.117046,10.296220,59.820008,580.989990,29005700.0
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,7.041873,58.333333,62.500000,-1.509312,98.490688,93.899911,1.523074,9.760009,640.809998,15719300.0


## 2. Finishing touches

Does not work

In [34]:
def drop_columns(df, columns_to_drop=['Open', 'High', 'Low', 'Close', 'Volume']):
    """
    Drop specified columns from a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.
    - columns_to_drop (list): List of column names to drop. Default is ['Open', 'High', 'Low', 'Close', 'Volume'].

    Returns:
    - pd.DataFrame: DataFrame with specified columns dropped.
    """
    # Drop specified columns
    df = df.drop(columns=columns_to_drop, errors='ignore')

    return df

In [35]:
drop_columns(df)

Unnamed: 0_level_0,MA_5,MA_20,BB_Up,BB_Down,RDP_1,BIAS_6,BIAS_12,BIAS_24,RSI,EMA_12,...,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D,PROC,Momentum,LAG_1,VOL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-06-29,,,,,,,,,,23.889999,...,0.000000,,,,,,,,,18766300.0
2010-06-30,,,,,-0.251147,,,,0.000000,23.880768,...,-0.003829,,,,,,-0.251147,-0.059999,23.889999,17187100.0
2010-07-01,,,,,-7.847256,,,,0.000000,23.585265,...,-0.125359,,,,,,-7.847256,-1.870001,23.830000,8218800.0
2010-07-02,,,,,-12.568297,,,,0.000000,22.910609,...,-0.370802,,,,,,-12.568297,-2.759998,21.959999,5139800.0
2010-07-06,20.998000,,,,-16.093749,,,,0.000000,21.864362,...,-0.701260,,,,,,-16.093749,-3.090000,19.200001,6866900.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,566.300012,503.125003,608.824291,397.425715,1.591341,0.671858,5.271958,15.612383,73.404463,536.273009,...,2.163018,50.000000,62.500000,-21.857906,78.142094,77.695027,1.591341,8.880004,558.020020,11788500.0
2020-01-29,568.586011,511.439502,613.965465,408.913539,2.485441,2.152384,6.198360,16.879208,72.209166,543.152545,...,1.926677,58.333333,62.500000,-11.107465,88.892535,80.395990,2.485441,14.089966,566.900024,17801500.0
2020-01-30,582.308008,522.563503,630.678887,414.448118,10.296220,10.365869,15.099888,26.625400,82.015666,558.176768,...,5.101604,58.333333,62.500000,-5.683490,94.316510,87.117046,10.296220,59.820008,580.989990,29005700.0
2020-01-31,599.458008,533.579002,646.866112,420.291893,1.523074,9.581680,14.915302,26.270638,83.725850,572.391113,...,7.041873,58.333333,62.500000,-1.509312,98.490688,93.899911,1.523074,9.760009,640.809998,15719300.0


In [44]:
def drop_nan_rows_with_indices(df):
    """
    Drop all rows with NaN values from a DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.

    Returns:
    - pd.DataFrame: DataFrame with NaN rows dropped.
    - list: List of indices corresponding to dropped rows.
    """
    # Find the indices of rows with NaN values
    nan_indices = df.index[df.isna().any(axis=1)].tolist()

    # Drop rows with NaN values
    cleaned_df = df.dropna()

    return cleaned_df, nan_indices

In [47]:
drop_nan_rows_with_indices(df)[0]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_5,MA_20,BB_Up,BB_Down,RDP_1,...,OSC,PSY_12,PSY_24,Williams_%R,Stochastic_%K,Stochastic_%D,PROC,Momentum,LAG_1,VOL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-08-02,20.500000,20.969999,20.330000,20.920000,718100.0,20.496000,19.524000,23.171871,15.876129,4.914739,...,0.206277,50.000000,41.666667,-29.621381,70.378619,63.895683,4.914739,0.979999,19.940001,718100.0
2010-08-03,21.000000,21.950001,20.820000,21.950001,1230500.0,20.776000,19.816000,23.241382,16.390618,4.923523,...,0.275763,50.000000,45.833333,-9.230738,90.769262,72.656776,4.923523,1.030001,20.920000,1230500.0
2010-08-04,21.950001,22.180000,20.850000,21.260000,913000.0,20.884000,20.089000,22.998121,17.179878,-3.143512,...,0.264709,41.666667,45.833333,-36.000000,64.000000,75.049294,-3.143512,-0.690001,21.950001,913000.0
2010-08-05,21.540001,21.549999,20.049999,20.450001,796200.0,20.904001,20.238500,22.873122,17.603877,-3.809967,...,0.195492,41.666667,45.833333,-65.454509,34.545491,63.104917,-3.809967,-0.809999,21.260000,796200.0
2010-08-06,20.100000,20.160000,19.520000,19.590000,741900.0,20.834000,20.348000,22.646491,18.049509,-4.205384,...,0.089811,41.666667,45.833333,-96.641791,3.358209,33.967900,-4.205384,-0.860001,20.450001,741900.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-28,568.489990,576.809998,558.080017,566.900024,11788500.0,566.300012,503.125003,608.824291,397.425715,1.591341,...,2.163018,50.000000,62.500000,-21.857906,78.142094,77.695027,1.591341,8.880004,558.020020,11788500.0
2020-01-29,575.690002,589.799988,567.429993,580.989990,17801500.0,568.586011,511.439502,613.965465,408.913539,2.485441,...,1.926677,58.333333,62.500000,-11.107465,88.892535,80.395990,2.485441,14.089966,566.900024,17801500.0
2020-01-30,632.419983,650.880005,618.000000,640.809998,29005700.0,582.308008,522.563503,630.678887,414.448118,10.296220,...,5.101604,58.333333,62.500000,-5.683490,94.316510,87.117046,10.296220,59.820008,580.989990,29005700.0
2020-01-31,640.000000,653.000000,632.520020,650.570007,15719300.0,599.458008,533.579002,646.866112,420.291893,1.523074,...,7.041873,58.333333,62.500000,-1.509312,98.490688,93.899911,1.523074,9.760009,640.809998,15719300.0


In [45]:
def scale_dataframe(df):
    """
    Scale a DataFrame using Standard scaling.

    Parameters:
    - df (pd.DataFrame): Input DataFrame.

    Returns:
    - pd.DataFrame: Scaled DataFrame.
    """
    # Scale the selected columns
    scaler = StandardScaler()
    df = scaler.fit_transform(df)

    return df

In [48]:
scale_dataframe(drop_nan_rows_with_indices(df)[0])

array([[-1.41645939, -1.41490465, -1.41182331, ...,  0.09336538,
        -1.42177232, -0.97581202],
       [-1.41222778, -1.40675896, -1.40760923, ...,  0.10041337,
        -1.4134661 , -0.87306063],
       [-1.40418772, -1.40484722, -1.40735123, ..., -0.14202814,
        -1.40473607, -0.9367288 ],
       ...,
       [ 3.76235069,  3.82085469,  3.72823177, ...,  8.38711165,
         3.33354289,  4.69669028],
       [ 3.826502  ,  3.83847591,  3.85310621, ...,  1.33094464,
         3.84056187,  2.03237324],
       [ 4.11162771,  4.94512459,  4.20571259, ..., 18.19893125,
         3.92328519,  8.31811525]])