In [103]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [104]:
data = pd.read_csv('..\sensex_for_ml.csv', index_col=0)
data

Unnamed: 0,Volume,EMA9,EMA20,EMA50,EMA100,EMA200,EMA500,MACD,RSI,Target
0,37000,3726.137652,3707.398215,3708.732188,3713.176797,3716.517658,3718.942305,17.017902,65.594419,1
1,26000,3743.972133,3717.675534,3712.911712,3715.199238,3717.500667,3719.327007,19.346568,68.202714,1
2,19800,3761.677707,3728.611197,3717.601449,3717.522025,3718.644939,3719.778795,20.909415,70.054109,-1
3,26000,3762.506179,3732.154899,3719.492375,3718.478422,3719.114343,3719.962593,16.510298,56.348537,-1
4,29000,3758.336926,3733.060138,3720.361691,3718.937461,3719.338677,3720.049209,11.280829,52.351981,1
...,...,...,...,...,...,...,...,...,...,...
5456,10900,83002.439162,82349.352752,81830.527411,81352.040260,80352.213679,76230.129737,276.890376,71.833191,0
5457,3400,83287.220079,82547.161419,81932.324130,81412.917556,80392.752287,76262.849154,293.978806,72.178588,0
5458,25700,83541.055751,82738.517325,82035.229005,81475.164703,80434.181602,76295.957135,294.142724,72.917185,-1
5459,13600,83675.221163,82878.837848,82120.587978,81529.357140,80471.770669,76327.557637,252.733714,67.783603,1


The first step is to separate the features and the target variable.

In [105]:
features = data.drop(['Target','EMA200'], axis=1)
target = data['Target']
features

Unnamed: 0,Volume,EMA9,EMA20,EMA50,EMA100,EMA500,MACD,RSI
0,37000,3726.137652,3707.398215,3708.732188,3713.176797,3718.942305,17.017902,65.594419
1,26000,3743.972133,3717.675534,3712.911712,3715.199238,3719.327007,19.346568,68.202714
2,19800,3761.677707,3728.611197,3717.601449,3717.522025,3719.778795,20.909415,70.054109
3,26000,3762.506179,3732.154899,3719.492375,3718.478422,3719.962593,16.510298,56.348537
4,29000,3758.336926,3733.060138,3720.361691,3718.937461,3720.049209,11.280829,52.351981
...,...,...,...,...,...,...,...,...
5456,10900,83002.439162,82349.352752,81830.527411,81352.040260,76230.129737,276.890376,71.833191
5457,3400,83287.220079,82547.161419,81932.324130,81412.917556,76262.849154,293.978806,72.178588
5458,25700,83541.055751,82738.517325,82035.229005,81475.164703,76295.957135,294.142724,72.917185
5459,13600,83675.221163,82878.837848,82120.587978,81529.357140,76327.557637,252.733714,67.783603


Now lets split the data into training sets and test sets.

In [106]:
np.random.seed(18)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, shuffle=False)
X_train

Unnamed: 0,Volume,EMA9,EMA20,EMA50,EMA100,EMA500,MACD,RSI
0,37000,3726.137652,3707.398215,3708.732188,3713.176797,3718.942305,17.017902,65.594419
1,26000,3743.972133,3717.675534,3712.911712,3715.199238,3719.327007,19.346568,68.202714
2,19800,3761.677707,3728.611197,3717.601449,3717.522025,3719.778795,20.909415,70.054109
3,26000,3762.506179,3732.154899,3719.492375,3718.478422,3719.962593,16.510298,56.348537
4,29000,3758.336926,3733.060138,3720.361691,3718.937461,3720.049209,11.280829,52.351981
...,...,...,...,...,...,...,...,...
4363,13200,48745.277187,48889.097813,49105.857703,48034.781264,41178.504675,57.951082,43.987837
4364,12700,48731.731905,48868.950476,49089.061353,48047.509373,41208.440987,54.022318,47.529429
4365,9500,48775.337868,48876.646785,49083.598622,48065.375756,41239.344463,68.564883,49.726964
4366,12200,48920.752326,48936.243297,49100.022604,48093.831883,41272.330753,111.663165,53.944239


X_train and X_test has some very large values and some very small values, hence we will standardize the data for better results.

In [107]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


Use RandomForestClassifer to train and predict the values.

In [108]:
clf = RandomForestClassifier(n_estimators=100, random_state=18)
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)

Now lets evaluate the models performance

In [109]:
clf.score(X_test_scaled, y_test)

0.39249771271729184