In [8]:
# Q15 – Classification models: Logistic Regression, KNN, Naive Bayes, Decision Tree

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

#  Load your ADNOC dataset (if not already loaded)
df = pd.read_csv("ADNOC_Stock_historical_Data.csv")

# Create classification target: 1 if next day's Close > today's Close, else 0
df['Price_Up_Next'] = (df['Close'].shift(-1) > df['Close']).astype(int)

#  Drop last row (because it has no "next day")
df = df.dropna(subset=['Price_Up_Next'])

#  Select features (independent variables) and target (dependent variable)
X = df[['Open', 'High', 'Low', 'Volume', 'Average_Price', 'Percentage_Change', 'Range']]
y = df['Price_Up_Next']



In [9]:
#  Train–test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0, shuffle=True
)


In [10]:

#  Feature scaling
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)


In [11]:

#  Logistic Regression
classifier_lr = LogisticRegression(random_state=0, max_iter=1000)
classifier_lr.fit(X_train_scaled, y_train)
y_pred_lr = classifier_lr.predict(X_test_scaled)



In [12]:
#  K-Nearest Neighbors
classifier_knn = KNeighborsClassifier(n_neighbors=5)
classifier_knn.fit(X_train_scaled, y_train)
y_pred_knn = classifier_knn.predict(X_test_scaled)


In [13]:


#  Naive Bayes
classifier_nb = GaussianNB()
classifier_nb.fit(X_train_scaled, y_train)
y_pred_nb = classifier_nb.predict(X_test_scaled)


In [14]:

#  Decision Tree (no scaling needed)
classifier_dt = DecisionTreeClassifier(random_state=0)
classifier_dt.fit(X_train, y_train)
y_pred_dt = classifier_dt.predict(X_test)


In [16]:
from sklearn.metrics import accuracy_score, confusion_matrix

print("First 10 predicted values:", y_pred_lr[:10])
print("First 10 true values     :", y_test[:10])

accuracy = accuracy_score(y_test, y_pred_lr)
cm = confusion_matrix(y_test, y_pred_lr)

print("Logistic Regression Accuracy:", accuracy)
print("Confusion Matrix:\n", cm)


First 10 predicted values: [0 1 0 1 0 1 1 0 1 0]
First 10 true values     : 1258    0
489     1
328     0
240     1
759     0
312     0
869     0
571     1
1009    1
211     0
Name: Price_Up_Next, dtype: int64
Logistic Regression Accuracy: 0.7945736434108527
Confusion Matrix:
 [[131  16]
 [ 37  74]]


In [15]:
# Q16) Evaluate the performance of each model using confusion matrix and accuracy
from sklearn.metrics import confusion_matrix, accuracy_score

#  Logistic Regression
print("Logistic Regression")
cm_lr = confusion_matrix(y_test, y_pred_lr)
acc_lr = accuracy_score(y_test, y_pred_lr)
print("Confusion Matrix:\n", cm_lr)
print("Accuracy:", acc_lr)
print("-" * 60)

#  KNN 
print("K-Nearest Neighbors (KNN)")
cm_knn = confusion_matrix(y_test, y_pred_knn)
acc_knn = accuracy_score(y_test, y_pred_knn)
print("Confusion Matrix:\n", cm_knn)
print("Accuracy:", acc_knn)
print("-" * 60)

#  Naive Bayes 
print("Naive Bayes")
cm_nb = confusion_matrix(y_test, y_pred_nb)
acc_nb = accuracy_score(y_test, y_pred_nb)
print("Confusion Matrix:\n", cm_nb)
print("Accuracy:", acc_nb)
print("-" * 60)

#  Decision Tree 
print("Decision Tree")
cm_dt = confusion_matrix(y_test, y_pred_dt)
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Confusion Matrix:\n", cm_dt)
print("Accuracy:", acc_dt)
print("-" * 60)

#  Identify best-fit classifier 
accuracies = [
    ("Logistic Regression", acc_lr),
    ("KNN", acc_knn),
    ("Naive Bayes", acc_nb),
    ("Decision Tree", acc_dt)
]

best_model_name, best_acc = max(accuracies, key=lambda x: x[1])
print("Best-fit classifier based on accuracy:", best_model_name)
print("Best accuracy:", best_acc)


Logistic Regression
Confusion Matrix:
 [[131  16]
 [ 37  74]]
Accuracy: 0.7945736434108527
------------------------------------------------------------
K-Nearest Neighbors (KNN)
Confusion Matrix:
 [[125  22]
 [ 36  75]]
Accuracy: 0.7751937984496124
------------------------------------------------------------
Naive Bayes
Confusion Matrix:
 [[120  27]
 [ 25  86]]
Accuracy: 0.7984496124031008
------------------------------------------------------------
Decision Tree
Confusion Matrix:
 [[118  29]
 [ 35  76]]
Accuracy: 0.751937984496124
------------------------------------------------------------
Best-fit classifier based on accuracy: Naive Bayes
Best accuracy: 0.7984496124031008


In [18]:
# 17) Predict the dependent variable using the best-fit classifier (Naive Bayes)
y_pred_best = classifier_nb.predict(X_test_scaled)

print("Predictions of best model (Naive Bayes) for the test set:")
print(y_pred_best[:20])  # show first 20 predictions


Predictions of best model (Naive Bayes) for the test set:
[0 1 1 1 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0 0]
