In [1]:
# Import Libraries
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# Downloading stock data from Yahoo Finance
stock_data = pd.read_csv("D:\python programs\dspl\\archive\ADANIPORTS.csv") 

In [3]:
# Creating a new dataframe with only the 'Close' column
df = pd.DataFrame(stock_data['Close'])

In [4]:
# Adding a new column with the shifted 'Close' values to create the target variable
df['Target'] = df['Close'].shift(-1)

In [5]:
# Dropping the last row which has NaN as the target variable
df.drop(df.tail(1).index, inplace=True)

In [6]:
# Creating a new column 'Action' based on the difference between the current and the next day's closing prices
df['Action'] = df['Target'] - df['Close']
df['Action'] = df['Action'].apply(lambda x: 1 if x > 0 else 0)

In [7]:
# Dropping the 'Target' column
df.drop(['Target'], axis=1, inplace=True)

In [8]:
# Creating the independent and dependent variables
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [9]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [10]:
# Creating a Random Forest classifier model and fitting it to the training data
rfc = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=0)
rfc.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', random_state=0)

In [11]:
# Creating a Decision Tree classifier model and fitting it to the training data
dtc = DecisionTreeClassifier(criterion='entropy', random_state=0)
dtc.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [12]:
# Predicting the 'Action' variable using the testing data
y_pred_rfc = rfc.predict(X_test)
y_pred_dtc = dtc.predict(X_test)

In [13]:
# Calculating the accuracy score and confusion matrix to evaluate the Random Forest Classifier's performance
rfc_accuracy = accuracy_score(y_test, y_pred_rfc)
rfc_conf_matrix = confusion_matrix(y_test, y_pred_rfc)
print("Random Forest Classifier:")
print("Accuracy score:", rfc_accuracy)
print("Confusion matrix:\n", rfc_conf_matrix)

Random Forest Classifier:
Accuracy score: 0.45112781954887216
Confusion matrix:
 [[162 174]
 [191 138]]


In [14]:
# Calculating the accuracy score and confusion matrix to evaluate the Decision Tree Classifier's performance
dtc_accuracy = accuracy_score(y_test, y_pred_dtc)
dtc_conf_matrix = confusion_matrix(y_test, y_pred_dtc)
print("Decision Tree Classifier:")
print("Accuracy score:", dtc_accuracy)
print("Confusion matrix:\n", dtc_conf_matrix)

Decision Tree Classifier:
Accuracy score: 0.4631578947368421
Confusion matrix:
 [[178 158]
 [199 130]]
