In [30]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_X_y



In [31]:

# class HyperTreeSVM:
#     def __init__(self, tree_depth, svm_kernel, svm_C):
#         self.tree_depth = tree_depth
#         self.svm_kernel = svm_kernel
#         self.svm_C = svm_C

#     def fit(self, X, y):
#         self.clf_tree = DecisionTreeClassifier(max_depth=self.tree_depth)
#         self.clf_tree.fit(X, y)

#         leaf_indices = np.where(self.clf_tree.apply(X) == -1)[0]
#         self.leaves_id = {i: leaf_id for leaf_id, i in enumerate(leaf_indices)}
#         self.leaves_counter = len(self.leaves_id)

#         self.clf_svm = []
#         for leaf_id in range(self.leaves_counter):
#             node_id = list(self.leaves_id.values())[leaf_id]
#             leaf_indices = np.where(self.clf_tree.apply(X) == node_id)
#             X_leaf, y_leaf = X[leaf_indices], y[leaf_indices]
            
#             clf_svm_leaf = SVC(kernel=self.svm_kernel, C=self.svm_C)
#             clf_svm_leaf.fit(X_leaf, y_leaf)
#             self.clf_svm.append(clf_svm_leaf)

#     def predict(self, X):
#         y_pred = np.zeros(X.shape[0])
#         for i in range(self.leaves_counter):
#             node_id = list(self.leaves_id.values())[i]
#             leaf_indices = np.where(self.clf_tree.apply(X) == node_id)
#             y_pred[leaf_indices] = self.clf_svm[i].predict(X[leaf_indices])
#         return y_pred


In [32]:

class HybridTreeSVM(BaseEstimator, ClassifierMixin):
    def __init__(self, tree_params=None, svm_params=None, lr_params=None):
        self.tree_params = tree_params or {}
        self.svm_params = svm_params or {'kernel': 'linear', 'C': 1, 'decision_function_shape': 'ovr'}
        self.lr_params = lr_params or {'random_state': 42, 'max_iter': 1000}
        
        self.tree_classifier = DecisionTreeClassifier(**self.tree_params)
        self.svm_classifier = SVC(**self.svm_params)
        self.lr_classifier = LogisticRegression(**self.lr_params)

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        
        # Train Decision Tree
        self.tree_classifier.fit(X, y)
        dt_predictions = self.tree_classifier.predict(X)

        # Train SVM
        self.svm_classifier.fit(X, y)
        svm_predictions = self.svm_classifier.predict(X)

        # Combine predictions
        combined_predictions = np.column_stack((dt_predictions, svm_predictions))

        # Train Logistic Regression model on combined predictions
        self.lr_classifier.fit(combined_predictions, y)

        return self

    def predict(self, X):
        dt_predictions = self.tree_classifier.predict(X)
        svm_predictions = self.svm_classifier.predict(X)

        # Combine predictions
        combined_predictions = np.column_stack((dt_predictions, svm_predictions))

        # Make final predictions using Logistic Regression
        final_predictions = self.lr_classifier.predict(combined_predictions)

        return final_predictions

    def score(self, X, y):
        # Score the model using accuracy
        predictions = self.predict(X)
        accuracy = np.mean(predictions == y)
        return accuracy


In [33]:
# Load the updated dataset
df = pd.read_csv('processed_stock_data.csv')

# Extract independent variables (X) and target variable (y)
X = df[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
y = df['signal']


In [34]:
# DecisionTreeClassifier does not accept missing values encoded as NaN.
# Handle missing values in X
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

# Create and fit the hybrid model
model = HybridTreeSVM()


In [36]:
model.fit(X_train, y_train)

In [37]:
# Make predictions and evaluate accuracy
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred)*100)

Accuracy: 40.51282051282051


In [38]:
hksvm_conf_matrix = confusion_matrix(y_test, y_pred)
print("\nHKSVM Confusion Matrix:")
print(hksvm_conf_matrix)


HKSVM Confusion Matrix:
[[23 33  9]
 [32 41 12]
 [10 20 15]]
