In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from scipy import stats

In [2]:
df = pd.read_csv("NSEI.csv")

In [3]:
df.head()

df.columns = ["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"]

In [4]:
#          Date      Open      High       Low     Close  Adj Close    Volume
# 0  2025-02-14  23096.45  23133.70  22774.85  22929.25   22929.25  254500.0
# 1  2025-02-13  23055.75  23235.50  22992.20  23031.40   23031.40  265700.0
# 2  2025-02-12  23050.80  23144.70  22798.35  23045.25   23045.25  279700.0
# 3  2025-02-11  23383.55  23390.05  22986.65  23071.80   23071.80  268000.0
# 4  2025-02-10  23543.80  23568.60  23316.30  23381.60   23381.60  234200.0

In [5]:
df["Rule 1"] = (df['Open'] > df['Close']).astype(int)

In [6]:
#          Date      Open      High       Low     Close  Adj Close    Volume  \
# 0  2025-02-14  23096.45  23133.70  22774.85  22929.25   22929.25  254500.0   
# 1  2025-02-13  23055.75  23235.50  22992.20  23031.40   23031.40  265700.0   
# 2  2025-02-12  23050.80  23144.70  22798.35  23045.25   23045.25  279700.0   
# 3  2025-02-11  23383.55  23390.05  22986.65  23071.80   23071.80  268000.0   
# 4  2025-02-10  23543.80  23568.60  23316.30  23381.60   23381.60  234200.0   

#    Rule 1  
# 0    True  
# 1    True  
# 2    True  
# 3    True  
# 4    True  

In [7]:
def row_sum(row):
    if row['Open'] == row['Low']:
        return 0
    elif row['Open'] == row['High']:
        return 1
    elif row['Open'] > (row['High'] + row['Low']) / 2:
        return 1
    return 0

df['Rule 2'] = df.apply(row_sum, axis=1)

In [8]:
#          Date      Open      High       Low     Close  Adj Close    Volume  \
# 0  2025-02-14  23096.45  23133.70  22774.85  22929.25   22929.25  254500.0   
# 1  2025-02-13  23055.75  23235.50  22992.20  23031.40   23031.40  265700.0   
# 2  2025-02-12  23050.80  23144.70  22798.35  23045.25   23045.25  279700.0   
# 3  2025-02-11  23383.55  23390.05  22986.65  23071.80   23071.80  268000.0   
# 4  2025-02-10  23543.80  23568.60  23316.30  23381.60   23381.60  234200.0   

#    Rule 1  Rule 2  
# 0    True       1  
# 1    True       0  
# 2    True       1  
# 3    True       1  
# 4    True       1  

In [9]:
df['TP'] = (df['High'] + df['Low'] + df['Close']) / 3

In [10]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Rule 1,Rule 2,TP
0,2025-02-19,22847.25,23049.95,22814.85,22932.9,22932.9,207000.0,0,0,22932.566667
1,2025-02-18,22963.65,22992.5,22801.5,22945.3,22945.3,210600.0,1,1,22913.1
2,2025-02-17,22809.9,22974.2,22725.45,22959.5,22959.5,207400.0,0,0,22886.383333
3,2025-02-14,23096.45,23133.7,22774.85,22929.25,22929.25,254500.0,1,1,22945.933333
4,2025-02-13,23055.75,23235.5,22992.2,23031.4,23031.4,265700.0,1,0,23086.366667


In [11]:
#          Date      Open      High       Low     Close  Adj Close    Volume  \
# 0  2025-02-14  23096.45  23133.70  22774.85  22929.25   22929.25  254500.0   
# 1  2025-02-13  23055.75  23235.50  22992.20  23031.40   23031.40  265700.0   
# 2  2025-02-12  23050.80  23144.70  22798.35  23045.25   23045.25  279700.0   
# 3  2025-02-11  23383.55  23390.05  22986.65  23071.80   23071.80  268000.0   
# 4  2025-02-10  23543.80  23568.60  23316.30  23381.60   23381.60  234200.0   

#    Rule 1  Rule 2            TP  
# 0    True       1  22945.933333  
# 1    True       0  23086.366667  
# 2    True       1  22996.100000  
# 3    True       1  23149.500000  
# 4    True       1  23422.166667  

In [12]:
df['Rule 3'] = (df['TP'] < df['TP'].shift(1)).astype(int)

In [13]:
#          Date      Open      High       Low     Close  Adj Close    Volume  \
# 0  2025-02-14  23096.45  23133.70  22774.85  22929.25   22929.25  254500.0   
# 1  2025-02-13  23055.75  23235.50  22992.20  23031.40   23031.40  265700.0   
# 2  2025-02-12  23050.80  23144.70  22798.35  23045.25   23045.25  279700.0   
# 3  2025-02-11  23383.55  23390.05  22986.65  23071.80   23071.80  268000.0   
# 4  2025-02-10  23543.80  23568.60  23316.30  23381.60   23381.60  234200.0   

#    Rule 1  Rule 2            TP  Rule 3  
# 0    True       1  22945.933333   False  
# 1    True       0  23086.366667    True  
# 2    True       1  22996.100000   False  
# 3    True       1  23149.500000    True  
# 4    True       1  23422.166667    True  

In [14]:
df['classifier'] = stats.mode(df[['Rule 1', 'Rule 2', 'Rule 3']], axis=1).mode

In [15]:
df.to_csv("NSEI (Rules).csv", index=False)

In [16]:
print(asdfsa())

NameError: name 'asdfsa' is not defined

In [None]:
#          Date      Open      High       Low     Close  Adj Close    Volume  \
# 0  2025-02-14  23096.45  23133.70  22774.85  22929.25   22929.25  254500.0   
# 1  2025-02-13  23055.75  23235.50  22992.20  23031.40   23031.40  265700.0   
# 2  2025-02-12  23050.80  23144.70  22798.35  23045.25   23045.25  279700.0   
# 3  2025-02-11  23383.55  23390.05  22986.65  23071.80   23071.80  268000.0   
# 4  2025-02-10  23543.80  23568.60  23316.30  23381.60   23381.60  234200.0   

#    Rule 1  Rule 2            TP  Rule 3  classifier  
# 0       1       1  22945.933333       0           1  
# 1       1       0  23086.366667       0           0  
# 2       1       1  22996.100000       1           1  
# 3       1       1  23149.500000       0           1  
# 4       1       1  23422.166667       0           1  

In [None]:
# independent variables
x = df[["Open", "High", "Low", "Close"]]

# dependent variable
y = df['classifier']

# LogisticRegression
lr = LogisticRegression()
lr.fit(x, y)

# DecisionTreeClassifier
tree = DecisionTreeClassifier()
tree.fit(x, y)

# KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(x, y)

# SVC
svc = SVC()
svc.fit(x, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
lr.predict([[23560.6  ,23689.85 ,23460.45 ,23644.8]])



array([0])

In [None]:
dfNew = pd.read_csv("NESI DATASET JAN 2.csv")
dfNew.columns = ["Date", "Open", "High", "Low", "Close", "Volume", "Rule 1", "Rule 2", "TP", "Rule 3", "classifier"]

In [None]:
# independent variable
x = dfNew[["Open", "High", "Low", "Close"]]

# ddependent variable
y = dfNew['classifier']