In [13]:
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import accuracy_score

from scipy import stats

In [14]:
df = pd.read_csv("New NSEI.csv")

In [None]:
df.drop(columns="Adj Close")

In [16]:
df['Date'] = pd.to_datetime(df['Date'])

In [17]:
def ruleOne(row):
    if row['Open'] > row['Close']:
        return 1
    return 0


def ruleTwo(row):
    if row['Open'] == row['Low']:
        return 0
    elif row['Open'] == row['High']:
        return 1
    elif row['Open'] > np.mean([row['High'], row['Low']]):
        return 0
    return 1


def ruleThree(row):
    if row['TP'] < row['TP'].shift(1):
        return 1
    return 0

def tp(row):
    return row[['High', 'Low', 'Close']].mean(axis=1)

In [19]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Rule 1,Rule 2
0,2025-03-07,22508.65,22633.8,22464.75,22552.5,22552.5,289800.0,0,1
1,2025-03-06,22476.35,22556.45,22245.85,22544.7,22544.7,372100.0,0,0
2,2025-03-05,22073.05,22394.9,22067.8,22337.3,22337.3,305900.0,0,1
3,2025-03-04,21974.45,22105.05,21964.6,22082.65,22082.65,253300.0,0,1
4,2025-03-03,22194.55,22261.55,22004.7,22119.3,22119.3,282400.0,1,0


In [22]:
df['Rule 1'] = df.apply(ruleOne, axis=1)
df['Rule 2'] = df.apply(ruleTwo, axis=1)
df['TP'] = df[['High', 'Low', 'Close']].mean(axis=1)
df['Rule 3'] = (df['TP'] < df['TP'].shift(1)).astype(int)
df['classifier'] = stats.mode(df[['Rule 1', 'Rule 2', 'Rule 3']], axis=1).mode

In [None]:
df.head()

In [None]:
# df.to_csv("NSEI (Rules).csv", index=False)

In [None]:
dateSplit = pd.to_datetime('01-01-2025', format="%d-%m-%Y")
newNSEI = df[df['Date'] >= dateSplit]
oldNSEI = df[df['Date'] < dateSplit]

In [None]:
oldNSEI[['Rule 1', 'Rule 2', 'TP', 'Rule 3']].head()

#### Hyper Parameter Tuning

In [None]:
params = [
	{
		"model": DecisionTreeClassifier(),
		"params": {
			"criterion": ["gini", "entropy"],
			"splitter": ["best", "random"],
			"max_depth": [2, 3, 4, 5, 6, 7, 8, 9, 10],
			"min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
			"min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
		},
	},
	{
		"model": LogisticRegression(max_iter=1000),
		"params": {
			"penalty": ["l1", "l2"],
			"C": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
			"solver": ["liblinear", "saga"],
			"max_iter": [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
		},
	},
	{
		"model": KNeighborsClassifier(),
		"params": {
			"n_neighbors": [5, 7, 9, 11, 13, 15],
			"weights": ["uniform", "distance"],
			"metric": ["minkowski", "euclidean", "manhattan"]
		},
	},
	{
		"model": SVC(),
		"params": {
			"C": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
			"kernel": ["linear", "poly", "rbf", "sigmoid"],
			"gamma": ["scale", "auto"],
			"decision_function_shape": ["ovo", "ovr"]
		}
	}
]

In [None]:
bestParams = []
tune = False
if tune:
	for param in params:
		gs = GridSearchCV(param['model'], param['params'], cv=3)
		gs.fit(newNSEI[['Open', 'High', 'Low', 'Close']], newNSEI['classifier'])
		bestParams.append({'param': gs.best_params_, 'score': gs.best_score_, 'model': param['model'] })

#### Model fitting

In [None]:
models = []
for i, param in enumerate(params):
    models.append(param['model'])
    models[i].fit(oldNSEI[['Open', 'High', 'Low', 'Close']], oldNSEI['classifier'])

In [None]:
for model in models:
    newNSEI[str(model)] = model.predict(newNSEI[['Open', 'High', 'Low', 'Close']])

In [None]:
newNSEI[['classifier', str(models[0]), str(models[1]), str(models[2]), str(models[3])]]

In [None]:
for model in models:
    print(model, (newNSEI[str(model)] == newNSEI['classifier']).sum() / newNSEI['classifier'].count(), sep=": ")
print()
for model in models:
    print(model, accuracy_score(newNSEI[str(model)], newNSEI['classifier']), sep=": ")