In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [11]:
stock_data = pd.read_csv('stock_data_trend_based.csv')
X = stock_data.iloc[:, :-1].values
y = stock_data.iloc[:, -1].values

# Recalculate features
stock_data['Daily_Return'] = (stock_data['Close'] - stock_data['Open']) / stock_data['Open']
stock_data['Price_Change'] = stock_data['Close'] - stock_data['Open']
stock_data['Volatility'] = stock_data['Close'].rolling(window=5).std()

# Moving Averages
stock_data['SMA_5'] = stock_data['Close'].rolling(window=5).mean()
stock_data['SMA_10'] = stock_data['Close'].rolling(window=10).mean()

# Relative Strength Index (RSI)
delta = stock_data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
stock_data['RSI'] = 100 - (100 / (1 + rs))

# MACD
stock_data['EMA_12'] = stock_data['Close'].ewm(span=12, adjust=False).mean()
stock_data['EMA_26'] = stock_data['Close'].ewm(span=26, adjust=False).mean()
stock_data['MACD'] = stock_data['EMA_12'] - stock_data['EMA_26']

# Bollinger Bands
stock_data['BB_Mid'] = stock_data['Close'].rolling(window=20).mean()
stock_data['BB_Upper'] = stock_data['BB_Mid'] + (2 * stock_data['Close'].rolling(window=20).std())
stock_data['BB_Lower'] = stock_data['BB_Mid'] - (2 * stock_data['Close'].rolling(window=20).std())

# Drop NaN values
stock_data.dropna(inplace=True)

# Define final features
updated_features = [
    'Open', 'High', 'Low', 'Close', 'Volume',
    'Daily_Return', 'Price_Change', 'Volatility',
    'SMA_5', 'SMA_10', 'RSI', 'MACD', 'BB_Upper', 'BB_Lower', 'Signal'
]

updated_data = stock_data[updated_features]

# Split data
from sklearn.model_selection import train_test_split
X_updated = updated_data.drop(columns=['Signal'])
y_updated = updated_data['Signal']
X_train, X_test, y_train, y_test = train_test_split(X_updated, y_updated, test_size=0.2, random_state=42, stratify=y_updated)

# Standardize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Random Forest Classifier with Hyperparameter Tuning
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10]
}

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
rf_grid = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=3, scoring='accuracy', n_jobs=-1)
rf_grid.fit(X_train_scaled, y_train)

# Best Random Forest Model
best_rf = rf_grid.best_estimator_
y_pred_rf = best_rf.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
classification_rep_rf = classification_report(y_test, y_pred_rf)

# Return results
accuracy_rf, classification_rep_rf


(0.7066326530612245,
 '              precision    recall  f1-score   support\n\n           0       0.71      0.69      0.70       194\n           1       0.70      0.72      0.71       198\n\n    accuracy                           0.71       392\n   macro avg       0.71      0.71      0.71       392\nweighted avg       0.71      0.71      0.71       392\n')

In [16]:
print(accuracy_rf)

0.7066326530612245


In [12]:
from sklearn.metrics import confusion_matrix
cf = confusion_matrix(y_test, y_pred_rf)
cf

array([[134,  60],
       [ 55, 143]])