## Stock Market Prediction Using Different Machine Learning Algorithms


In [1]:
#importing the necessary libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#Importing the dataset
df=pd.read_csv("Stock_ABDI1.csv")
df['datetime'] = pd.to_datetime(df['<DTYYYYMMDD>'], format='%Y%m%d')
df.index = df['datetime']
df.dropna()
df_new = df[["<FIRST>","<HIGH>","<LOW>","<CLOSE>","<VALUE>","<VOL>"]]
df_new.head()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-03-25,26000.0,26000.0,24885.0,24885.0,112766576,4520
2001-03-26,24885.0,24885.0,24885.0,24885.0,1119825,45
2001-04-08,25000.0,25000.0,25000.0,25000.0,50000000,2000
2001-04-10,25003.0,25003.0,25003.0,25003.0,1750210,70
2001-04-15,25010.0,25010.0,25010.0,25010.0,3001200,120


## Calculating the technical indicators

In [3]:
#1. Simple n day moving average
def moving_average(df, n):
    """Calculate the moving average for the given data.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    MA = pd.Series(df['<CLOSE>'].rolling(n, min_periods=n).mean(), name='MA_' + str(n))
    df = df.join(MA)
    return df
df_new = moving_average(df_new,10)
df_new.tail()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9


In [5]:
#2. Weighted Moving Average (WMA)
def weighted_moving_average(df, n):
    """
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    WMA = pd.Series(df['<CLOSE>'].ewm(span=n, min_periods=n).mean(), name='WMA_' + str(n))
    df = df.join(WMA)
    return df
df_new = weighted_moving_average(df_new,10)
df_new.tail()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10,WMA_10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5,55002.149356
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9,54921.394928
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7,55226.777668
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1,55390.636274
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9,55937.611497


In [7]:
#3. Momentum
def momentum(df, n):
    """
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    M = pd.Series(df['<CLOSE>'].diff(n), name='MOM_' + str(n))
    df = df.join(M)
    return df
df_new = momentum(df_new,10)
df_new.tail()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10,WMA_10,MOM_10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5,55002.149356,414.0
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9,54921.394928,484.0
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7,55226.777668,5218.0
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1,55390.636274,5944.0
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9,55937.611497,10388.0


In [10]:
#4. Stochastic K%
def stochastic_oscillator_k(df):
    """Calculate stochastic oscillator %K for given data.
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    SOk = pd.Series((df['<CLOSE>'] - df['<LOW>']) / (df['<HIGH>'] - df['<LOW>']), name='SO_k')
    df = df.join(SOk)
    return df
df_new = stochastic_oscillator_k(df_new)
df_new.tail()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10,WMA_10,MOM_10,SO_k
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5,55002.149356,414.0,0.085597
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9,54921.394928,484.0,0.143197
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7,55226.777668,5218.0,0.700525
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1,55390.636274,5944.0,0.462247
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9,55937.611497,10388.0,0.843476


In [12]:
#5. Stochastic D%
def stochastic_oscillator_d(df, n):
    """Calculate stochastic oscillator %D for given data.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    SOk = pd.Series((df['<CLOSE>'] - df['<LOW>']) / (df['<HIGH>'] - df['<LOW>']), name='SO%k')
    SOd = pd.Series(SOk.ewm(span=n, min_periods=n).mean(), name='SO_' + str(n))
    df = df.join(SOd)
    return df
df_new = stochastic_oscillator_d(df_new,10)
df_new.tail()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10,WMA_10,MOM_10,SO_k,SO_10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5,55002.149356,414.0,0.085597,0.497998
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9,54921.394928,484.0,0.143197,0.418632
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7,55226.777668,5218.0,0.700525,0.479155
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1,55390.636274,5944.0,0.462247,0.47564
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9,55937.611497,10388.0,0.843476,0.550159


In [15]:
#6. Relative Strength Index
#Error
"""
def relative_strength_index(df, n):
    Calculate Relative Strength Index(RSI) for given data.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    i = df.index[0]
    UpI = [0]
    DoI = [0]
    while i + 1 <= df.index[-1]:
        UpMove = float(df.loc[i + 1, 'high']) - float(df.loc[i, 'high'])
        DoMove = float(df.loc[i, 'low']) - float(df.loc[i + 1, 'low'])
        if UpMove > DoMove and UpMove > 0:
            UpD = UpMove
        else:
            UpD = 0
        UpI.append(UpD)
        if DoMove > UpMove and DoMove > 0:
            DoD = DoMove
        else:
            DoD = 0
        DoI.append(DoD)
        i = i + 1
    UpI = pd.Series(UpI)

    DoI = pd.Series(DoI)
    PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())
    NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())

    # rsi = pd.Series(PosDI / (PosDI + NegDI), name='RSI_' + str(n))
    rsi = pd.DataFrame(PosDI / (PosDI + NegDI), columns=['RSI_' + str(n)])
    rsi = rsi.set_index(df.index)
    df = df.join(rsi)
    return df
"""

"\ndef relative_strength_index(df, n):\n    Calculate Relative Strength Index(RSI) for given data.\n    :param df: pandas.DataFrame\n    :param n:\n    :return: pandas.DataFrame\n    i = df.index[0]\n    UpI = [0]\n    DoI = [0]\n    while i + 1 <= df.index[-1]:\n        UpMove = float(df.loc[i + 1, 'high']) - float(df.loc[i, 'high'])\n        DoMove = float(df.loc[i, 'low']) - float(df.loc[i + 1, 'low'])\n        if UpMove > DoMove and UpMove > 0:\n            UpD = UpMove\n        else:\n            UpD = 0\n        UpI.append(UpD)\n        if DoMove > UpMove and DoMove > 0:\n            DoD = DoMove\n        else:\n            DoD = 0\n        DoI.append(DoD)\n        i = i + 1\n    UpI = pd.Series(UpI)\n\n    DoI = pd.Series(DoI)\n    PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())\n    NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())\n\n    # rsi = pd.Series(PosDI / (PosDI + NegDI), name='RSI_' + str(n))\n    rsi = pd.DataFrame(PosDI / (PosDI + NegDI), columns=['R

In [16]:
#7. Signal
#Value of n_fast and n_slow ?? 
"""
  def macd(df, n_fast, n_slow):
    Calculate MACD, MACD Signal and MACD difference
    :param df: pandas.DataFrame
    :param n_fast:
    :param n_slow:
    :return: pandas.DataFrame
    EMAfast = pd.Series(df['close'].ewm(span=n_fast, min_periods=n_slow).mean())
    EMAslow = pd.Series(df['close'].ewm(span=n_slow, min_periods=n_slow).mean())
    MACD = pd.Series(EMAfast - EMAslow, name='MACD_' + str(n_fast) + '_' + str(n_slow))
    MACDsign = pd.Series(MACD.ewm(span=9, min_periods=9).mean(), name='MACDsign_' + str(n_fast) + '_' + str(n_slow))
    MACDdiff = pd.Series(MACD - MACDsign, name='MACDdiff_' + str(n_fast) + '_' + str(n_slow))
    df = df.join(MACD)
    df = df.join(MACDsign)
    df = df.join(MACDdiff)
    return df
"""

"\n  def macd(df, n_fast, n_slow):\n    Calculate MACD, MACD Signal and MACD difference\n    :param df: pandas.DataFrame\n    :param n_fast:\n    :param n_slow:\n    :return: pandas.DataFrame\n    EMAfast = pd.Series(df['close'].ewm(span=n_fast, min_periods=n_slow).mean())\n    EMAslow = pd.Series(df['close'].ewm(span=n_slow, min_periods=n_slow).mean())\n    MACD = pd.Series(EMAfast - EMAslow, name='MACD_' + str(n_fast) + '_' + str(n_slow))\n    MACDsign = pd.Series(MACD.ewm(span=9, min_periods=9).mean(), name='MACDsign_' + str(n_fast) + '_' + str(n_slow))\n    MACDdiff = pd.Series(MACD - MACDsign, name='MACDdiff_' + str(n_fast) + '_' + str(n_slow))\n    df = df.join(MACD)\n    df = df.join(MACDsign)\n    df = df.join(MACDdiff)\n    return df\n"

In [17]:
#8. Larry Williams R%
#lEFT

In [19]:
#9. Accumulation / Distribution
def accumulation_distribution(df, n):
    """Calculate Accumulation/Distribution for given data.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    ad = (2 * df['<CLOSE>'] - df['<HIGH>'] - df['<LOW>']) / (df['<HIGH>'] - df['<LOW>']) * df['<VOL>']
    M = ad.diff(n - 1)
    N = ad.shift(n - 1)
    ROC = M / N
    AD = pd.Series(ROC, name='Acc/Dist_ROC_' + str(n))
    df = df.join(AD)
    return df
df_new = accumulation_distribution(df_new,10)
df_new.tail()

  out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]


Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10,WMA_10,MOM_10,SO_k,SO_10,Acc/Dist_ROC_10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5,55002.149356,414.0,0.085597,0.497998,-0.208579
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9,54921.394928,484.0,0.143197,0.418632,0.180189
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7,55226.777668,5218.0,0.700525,0.479155,-3.989948
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1,55390.636274,5944.0,0.462247,0.47564,-0.928358
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9,55937.611497,10388.0,0.843476,0.550159,-0.353527


In [20]:
#10. Commodity Channel Index
def commodity_channel_index(df, n):
    """Calculate Commodity Channel Index for given data.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    PP = (df['<HIGH>'] + df['<LOW>'] + df['<CLOSE>']) / 3
    CCI = pd.Series((PP - PP.rolling(n, min_periods=n).mean()) / PP.rolling(n, min_periods=n).std(),
                    name='CCI_' + str(n))
    df = df.join(CCI)
    return df
df_new = commodity_channel_index(df_new,10)
df_new.tail()

Unnamed: 0_level_0,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,MA_10,WMA_10,MOM_10,SO_k,SO_10,Acc/Dist_ROC_10,CCI_10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-05-26,56893.0,60313.0,56890.0,57183.0,63584582573,1112100,53564.5,55002.149356,414.0,0.085597,0.497998,-0.208579,1.149889
2020-05-27,54317.0,56000.0,54317.0,54558.0,158334486285,2902152,53612.9,54921.394928,484.0,0.143197,0.418632,0.180189,0.325225
2020-05-30,55001.0,57285.0,55001.0,56601.0,91570928155,1617823,54134.7,55226.777668,5218.0,0.700525,0.479155,-3.989948,0.556432
2020-05-31,58870.0,58870.0,53771.0,56128.0,68818222651,1226100,54729.1,55390.636274,5944.0,0.462247,0.47564,-0.928358,0.413202
2020-06-01,58450.0,58934.0,55516.0,58399.0,109955845548,1882827,55767.9,55937.611497,10388.0,0.843476,0.550159,-0.353527,0.663146
