In [5]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from Database.MongoDB_Connection import start_db, close_db
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb
import joblib

In [7]:
# Database connection method is used here in order to bring all the database documents over from the collection. The connection should already be made via MongoDB_Operations file. If run for separate reasons, the connection will be made now.

db = start_db()
print(db)
get_data = db.Finance_Data.find({
    "Ticker": {"$exists": True},
    "SMA": {"$exists": True},
    "EMA": {"$exists": True},
    "MACD": {"$exists": True},
    "RSI": {"$exists": True},
    "ATR": {"$exists": True},
    "WILLR": {"$exists": True},
    "CCI": {"$exists": True},
    "Success": {"$exists": True}
},
    {"_id":0})

df = pd.DataFrame(list(get_data))

C:\Users\CRTITSS\Desktop\Projects\Backend_Content
Test entry exists
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'C964_Database')
<class 'pandas.core.frame.DataFrame'>


In [None]:
# Splitting up the values for training and target.

X = df[["Open", "High", "Low", "Volume", "Sentiment", "SMA", "EMA", "MACD", "RSI", "ATR", "WILLR", "CCI"]]
y = df["Success"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=471189)

In [None]:
# Train the model.

# Initialize the XGB model. Label encoder parameter is used to safeguard against a depracated functions with sci-kit learn. Eval metric is set to LogLoss to show how well the model performed. It will return a number between 0 - 1. 0 meaning it performed perfectly( which means overfitting likely occurred which is not a good thing). N_jobs refers to the number of parallel threads used to train. It is set to -1 to use all available threads to speed up training.
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_jobs=-1)

model.fit(X_train, y_train)

joblib.dump(model, "C964_model.joblib")  # Saves the model for use.

In [8]:
# Evaluate the model and print the results.

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(model.classes_)
print(f" Accuracy: {acc:.2f}")
print(classification_report(y_test, y_pred))

[0 1]
 Accuracy: 0.74
              precision    recall  f1-score   support

           0       0.74      0.75      0.75     57848
           1       0.75      0.73      0.74     57225

    accuracy                           0.74    115073
   macro avg       0.74      0.74      0.74    115073
weighted avg       0.74      0.74      0.74    115073



In [8]:
# Disconnect from database. Once model is deployed, the database will not need to be accessed for use with predictions.
close_db()

Database connection closed.
