## Explanation of Each Indicator:
SMA: The average closing price over a specified number of periods.
EMA: A weighted average that gives more importance to recent prices.
RSI: A momentum oscillator that measures the speed and change of price movements. Ranges from 0 to 100.
MACD: A trend-following momentum indicator that shows the relationship between two EMAs.
Bollinger Bands: A volatility indicator that uses standard deviations to create bands around the SMA.
ATR: Measures market volatility by decomposing the entire range of an asset for a given period.
VWAP: The average price a security has traded at throughout the day, based on both volume and price.
OBV: Uses volume flow to predict changes in stock price.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
import datetime
from sklearn.model_selection import train_test_split
import sklearn.datasets
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns

In [3]:
company = "INFY"
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2024, 9, 1)
ticker = yf.Ticker(company)
ohlc_data = ticker.history(start=start_date,end=end_date)

data= ohlc_data.drop(columns=['Dividends','Stock Splits'],axis=1)

In [4]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Volume
count,1174.0,1174.0,1174.0,1174.0,1174.0
mean,16.689469,16.831364,16.55641,16.69869,9306645.0
std,3.936089,3.957231,3.918478,3.939357,5115846.0
min,6.185953,6.427313,6.042926,6.203831,1320600.0
25%,15.178055,15.325602,15.075725,15.202704,5878425.0
50%,17.171953,17.322604,17.01973,17.168737,7979950.0
75%,19.111702,19.256939,18.964908,19.140412,11328150.0
max,24.421582,24.645719,23.889256,24.468277,42686600.0


In [6]:
data['change in close price'] = data['Close'].pct_change() *100

## deciding value of dependent variable(decision)

In [7]:
# Buy: if change > 0.8%
data['decision'] = 2  # Default all values to Buy
data.loc[data['change in close price'] < -0.7, 'decision'] = 0  # Sell: if change < -0.63%
data.loc[(data['change in close price'] >= -0.7) & (data['change in close price'] <= 0.84), 'decision'] = 1  # Hold: if -0.63% <= change <= 0.8%


In [21]:
# data['decision_EMA'] = 2  # Default all values to Buy
# data.loc[data['ema_indicator'] < 0, 'decision_EMA'] = 0  

threshold = 0.01

# Create a decision variable using 2 for Buy, 1 for Hold, and 0 for Sell
data['Decision_EMA'] = np.where(data['ema_indicator'] > (data['EMA'] * threshold), 2, 
                                np.where(data['ema_indicator'] < -(data['EMA'] * threshold), 0, 1))

In [22]:
data['decision_RSI'] = 1  # Default to hold
data.loc[data['RSI'] <= 30, 'decision_RSI'] = 2  # Buy if RSI <= 30
data.loc[data['RSI'] >= 65, 'decision_RSI'] = 0  # Sell if RSI >= 65
 

In [23]:
data['decision_MACD'] = 0  
data.loc[data['MACD'] > data['Signal_Line'], 'decision_MACD'] = 2 

In [39]:
# data['VWAP'] = (data['Close'] * data['Volume']).cumsum() / data['Volume'].cumsum()

# Initialize a column for VWAP-based decision (2 = Buy, 1 = Hold, 0 = Sell)
data['decision_VWAP'] = 1  # Default to Hold

# Decision rules based on VWAP
# Buy (2): If Close price is below VWAP by a significant margin (indicating undervaluation)
data.loc[data['Close'] < data['VWAP'] * 0.98, 'decision_VWAP'] = 2

# Sell (0): If Close price is above VWAP by a significant margin (indicating overvaluation)
data.loc[data['Close'] > data['VWAP'] * 1.02, 'decision_VWAP'] = 0

In [25]:
data['Decision_OBV'] = np.where(data['OBV'] > data['OBV'].shift(1), 2, 
                            np.where(data['OBV'] < data['OBV'].shift(1), 0, 1))

In [26]:
data['Decision_Band'] = np.where(data['Close'] <= data['Lower_Band'], 2, 
                                 np.where(data['Close'] >= data['Upper_Band'], 0, 1))

In [27]:
data['RSI'].describe()

count    1161.000000
mean       53.736776
std        17.474660
min         3.791507
25%        42.446047
50%        54.458609
75%        65.432113
max        96.594399
Name: RSI, dtype: float64

In [43]:
data['Decision_EMA'].value_counts()

Decision_EMA
0    632
2    353
1    189
Name: count, dtype: int64

In [53]:
data['decision_combined'].value_counts()

decision_combined
1    1163
2       7
0       4
Name: count, dtype: int64

In [51]:
# Initialize decision column with Hold (1) as default
data['decision_combined'] = 1

# Define Buy (2) condition:
# - RSI is below 30 (oversold)
# - MACD > Signal_Line (uptrend indication)
# - Close price is below a recent average (e.g., 20-period moving average)
data.loc[
    (data['RSI'] < 30) & 
    (data['MACD'] > data['Signal_Line']) & 
    (data['Close'] < data['Close'].rolling(window=20).mean()), 
    'decision_combined'
] = 2

# Define Sell (0) condition:
# - RSI is above 70 (overbought)
# - MACD < Signal_Line (downtrend indication)
# - Close price is above a recent average (e.g., 20-period moving average)
data.loc[
    (data['RSI'] > 70) & 
    (data['MACD'] < data['Signal_Line']) & 
    (data['Close'] > data['Close'].rolling(window=20).mean()), 
    'decision_combined'
] = 0

# Display the combined decision column to verify
print(data[['RSI', 'MACD', 'Signal_Line', 'Close', 'decision_combined']])


                                 RSI      MACD  Signal_Line      Close  \
Date                                                                     
2020-01-02 00:00:00-05:00  54.458609  0.000000     0.000000   9.198477   
2020-01-03 00:00:00-05:00  54.458609  0.001426     0.000285   9.216355   
2020-01-06 00:00:00-05:00  54.458609 -0.004604    -0.000693   9.126964   
2020-01-07 00:00:00-05:00  54.458609 -0.017119    -0.003978   9.028632   
2020-01-08 00:00:00-05:00  54.458609 -0.027443    -0.008671   9.019693   
...                              ...       ...          ...        ...   
2024-08-26 00:00:00-04:00  87.179492  0.456187     0.463866  22.440001   
2024-08-27 00:00:00-04:00  89.956348  0.478230     0.466738  22.799999   
2024-08-28 00:00:00-04:00  90.086225  0.506004     0.474592  23.000000   
2024-08-29 00:00:00-04:00  90.254259  0.537952     0.487264  23.200001   
2024-08-30 00:00:00-04:00  90.534999  0.563235     0.502458  23.280001   

                           decision_c

In [54]:
data.tail(50)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,change in close price,decision,EMA,RSI,EMA_12,...,SMA,ema_indicator,Decision_EMA,decision_RSI,decision_MACD,decision_VWAP,Decision_OBV,Decision_Band,f1,decision_combined
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-06-21 00:00:00-04:00,18.209999,18.24,17.9,18.059999,22822100,-1.149431,0,17.422111,76.518174,17.778326,...,17.003236,-0.637888,0,0,2,0,0,1,2,1
2024-06-24 00:00:00-04:00,17.99,18.190001,17.959999,18.139999,5758800,0.442967,1,17.468427,76.326482,17.833968,...,17.014361,-0.671573,0,0,2,0,2,1,2,1
2024-06-25 00:00:00-04:00,18.26,18.360001,18.16,18.34,8733100,1.10254,2,17.524657,79.615376,17.911819,...,17.033799,-0.815343,0,0,2,0,2,1,2,1
2024-06-26 00:00:00-04:00,18.290001,18.35,18.16,18.219999,8693400,-0.654312,1,17.569518,72.573807,17.959232,...,17.059657,-0.650481,0,0,2,0,0,1,2,1
2024-06-27 00:00:00-04:00,18.620001,18.700001,18.549999,18.629999,8949300,2.250274,2,17.637936,72.803298,18.062427,...,17.098029,-0.992063,0,0,2,0,2,1,2,1
2024-06-28 00:00:00-04:00,18.690001,18.74,18.540001,18.620001,9520300,-0.053668,1,17.701295,65.803122,18.148207,...,17.138161,-0.918706,0,0,2,0,0,1,2,1
2024-07-01 00:00:00-04:00,18.879999,18.950001,18.709999,18.76,8064200,0.751876,1,17.769599,74.210533,18.242329,...,17.189718,-0.990402,0,0,2,0,2,1,2,1
2024-07-02 00:00:00-04:00,19.1,19.26,19.040001,19.23,10531700,2.505327,2,17.863818,81.034457,18.394279,...,17.244794,-1.366181,0,0,2,0,2,0,2,1
2024-07-03 00:00:00-04:00,19.290001,19.43,19.280001,19.370001,7252400,0.728036,1,17.960991,81.589946,18.54439,...,17.293065,-1.40901,0,0,2,0,2,0,2,1
2024-07-05 00:00:00-04:00,19.450001,19.57,19.35,19.43,7709600,0.309755,1,18.055766,83.750023,18.680637,...,17.34528,-1.374234,0,0,2,0,2,0,2,1


In [48]:
# Set 'f1' to 2 by default
data['f1'] = 2

# Apply condition based on weighted sum of 'decision_RSI' and 'decision_MACD'
data.loc[(data['decision_RSI'] * 5 + data['decision_MACD'] * 2) == 0, 'f1'] = 0


In [191]:
# print(data.columns)


In [9]:
# # Convert the 'Date' column to datetime format
# data['Date'] = pd.to_datetime(data['Date'])

# # Set the 'Date' column as the index
# data.set_index('Date', inplace=True)


# Calculate EMA
span = 30  # Lookback period (e.g., 3 days)
data['EMA'] = data['Close'].ewm(span=span, adjust=False).mean()

In [10]:
# Calculate RSI
delta = data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
data['RSI'] = 100 - (100 / (1 + rs))


In [11]:
data['EMA_12'] = data['Close'].ewm(span=12, adjust=False).mean()
data['EMA_26'] = data['Close'].ewm(span=26, adjust=False).mean()
data['MACD'] = data['EMA_12'] - data['EMA_26']
data['Signal_Line'] = data['MACD'].ewm(span=9, adjust=False).mean()

In [12]:
# Calculate Bollinger Bands
data['Middle_Band'] = data['Close'].rolling(window=20).mean()
data['Upper_Band'] = data['Middle_Band'] + (data['Close'].rolling(window=20).std() * 2)
data['Lower_Band'] = data['Middle_Band'] - (data['Close'].rolling(window=20).std() * 2)


In [13]:
# Calculate ATR
high_low = data['High'] - data['Low']
high_close = (data['High'] - data['Close'].shift()).abs()
low_close = (data['Low'] - data['Close'].shift()).abs()
true_range = high_low.to_frame().join(high_close.to_frame().rename(columns={0: 'High_Close'})).join(low_close.to_frame().rename(columns={0: 'Low_Close'})).max(axis=1)
data['ATR'] = true_range.rolling(window=14).mean()


In [14]:
# Calculate VWAP
data['Cumulative_Volume'] = data['Volume'].cumsum()
data['Cumulative_Volume_Price'] = (data['Close'] * data['Volume']).cumsum()
data['VWAP'] = data['Cumulative_Volume_Price'] / data['Cumulative_Volume']


In [15]:
# Calculate OBV
data['OBV'] = (data['Volume'].where(data['Close'] > data['Close'].shift(), -data['Volume'])).cumsum()


In [16]:
window_period = 50

# Calculate the SMA
data['SMA'] = data['Close'].rolling(window=window_period).mean()

In [17]:
data['ema_indicator'] = data['EMA'] - data['Close']

In [18]:
data['EMA'].describe()

count    1174.000000
mean       16.542214
std         3.891153
min         7.652722
25%        15.097863
50%        17.095940
75%        18.789921
max        22.954394
Name: EMA, dtype: float64

In [36]:
# data.tail(50)

In [19]:
data['RSI'].describe()

count    1161.000000
mean       53.736776
std        17.474660
min         3.791507
25%        42.446047
50%        54.458609
75%        65.432113
max        96.594399
Name: RSI, dtype: float64

In [86]:
corr_matrix = data_2.corr()
corr_matrix['decision'].sort_values(ascending = False)

decision         1.000000
Decision_OBV     0.769517
RSI              0.234974
decision_MACD    0.138822
MACD             0.059639
OBV              0.038748
Signal_Line      0.016755
Close            0.005677
Lower_Band      -0.043733
Middle_Band     -0.045696
Upper_Band      -0.046564
decision_VWAP   -0.048620
Volume          -0.050010
VWAP            -0.050799
SMA             -0.071827
decision_RSI    -0.208432
Decision_EMA    -0.264080
ema_indicator   -0.283070
Decision_Band   -0.340974
Name: decision, dtype: float64

In [64]:
reduced_columns = ['EMA_12','EMA_26','change in close price','EMA','Cumulative_Volume_Price','Cumulative_Volume','EMA']

In [65]:
data = data.drop(columns=reduced_columns,axis=1)

In [76]:
data_2 = data.drop(columns=['decision_combined','f1','ATR','High','Low','Open'])

In [87]:
data_2 = data_2.drop(columns=['Lower_Band','Middle_Band','Upper_Band'])

In [66]:
data.isnull().sum()

Open                 0
High                 0
Low                  0
Close                0
Volume               0
decision             0
RSI                  0
MACD                 0
Signal_Line          0
Middle_Band          0
Upper_Band           0
Lower_Band           0
ATR                  0
VWAP                 0
OBV                  0
SMA                  0
ema_indicator        0
Decision_EMA         0
decision_RSI         0
decision_MACD        0
decision_VWAP        0
Decision_OBV         0
Decision_Band        0
f1                   0
decision_combined    0
dtype: int64

In [88]:
x = data_2.drop(columns = 'decision')
y = data_2['decision']

In [89]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.1,random_state=42)

In [79]:
# plt.figure(figsize=(12, 6))
# plt.plot(data['RSI'], color='blue')  # Replace 'date' with your actual time column name
# plt.title('RSI Fluctuations Over Time')
# plt.xlabel('Close')
# plt.ylabel('RSI Values')
# plt.grid()
# plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
# plt.show()

In [32]:
from sklearn.impute import SimpleImputer     #the imputer put median to the vacant place automatically
imputer_median = SimpleImputer(strategy = "median")
median_columns = ['RSI','Middle_Band', 'Upper_Band', 'Lower_Band', 
                  'ATR','SMA']

data[median_columns] = imputer_median.fit_transform(data[median_columns])

In [90]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_train_std = scaler.fit_transform(x_train)
x_test_std = scaler.transform(x_test)

In [91]:
x_train_std

array([[0.34390293, 0.23853406, 0.55738456, ..., 0.        , 0.        ,
        0.5       ],
       [0.42226915, 0.13938742, 0.56595733, ..., 0.        , 1.        ,
        0.5       ],
       [0.0306146 , 0.46256588, 0.26106731, ..., 1.        , 1.        ,
        0.5       ],
       ...,
       [0.68468725, 0.16302277, 0.75880207, ..., 0.        , 1.        ,
        0.5       ],
       [0.49284085, 0.15135377, 0.78141298, ..., 0.5       , 0.        ,
        0.5       ],
       [0.66148736, 0.17919306, 0.8170421 , ..., 0.        , 1.        ,
        0.5       ]])

In [82]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier 
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

In [112]:
model_1 = RandomForestClassifier(n_estimators=200)
model_1.fit(x_train_std,y_train)


In [113]:
test_1 = model_1.predict(x_test_std)
print(f" accurace : {accuracy_score(y_test,test_1)}")

 accurace : 0.7033898305084746
