In [2]:
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)

df = pd.read_csv("quantvision_financial_dataset_200.csv")

X = df.drop("future_trend", axis=1)
y = df["future_trend"]

categorical_features = ["asset_type", "market_regime"]

numerical_features = [
    "lookback_days",
    "high_volatility",
    "trend_continuation",
    "technical_score",
    "edge_density",
    "slope_strength",
    "candlestick_variance",
    "pattern_symmetry"
]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(drop="first"), categorical_features),
        ("num", StandardScaler(), numerical_features)
    ]
)
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

log_reg_pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", LogisticRegression(max_iter=1000))
])

log_reg_pipeline.fit(X_train, y_train)

y_pred_lr = log_reg_pipeline.predict(X_test)

print("===== Logistic Regression Results =====")
print("Accuracy :", accuracy_score(y_test, y_pred_lr))
print("Precision:", precision_score(y_test, y_pred_lr))
print("Recall   :", recall_score(y_test, y_pred_lr))
print("F1-score :", f1_score(y_test, y_pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lr))
print()

mlp_pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", MLPClassifier(
        hidden_layer_sizes=(64, 32),
        activation="relu",
        solver="adam",
        max_iter=500,
        random_state=42
    ))
])

mlp_pipeline.fit(X_train, y_train)

y_pred_nn = mlp_pipeline.predict(X_test)

print("===== Neural Network Results =====")
print("Accuracy :", accuracy_score(y_test, y_pred_nn))
print("Precision:", precision_score(y_test, y_pred_nn))
print("Recall   :", recall_score(y_test, y_pred_nn))
print("F1-score :", f1_score(y_test, y_pred_nn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_nn))

===== Logistic Regression Results =====
Accuracy : 0.925
Precision: 0.9473684210526315
Recall   : 0.972972972972973
F1-score : 0.96
Confusion Matrix:
 [[ 1  2]
 [ 1 36]]

===== Neural Network Results =====
Accuracy : 0.95
Precision: 0.972972972972973
Recall   : 0.972972972972973
F1-score : 0.972972972972973
Confusion Matrix:
 [[ 2  1]
 [ 1 36]]


# Logistic Regression

Logistic Regression is a linear classification model commonly used as a baseline in machine learning.

# Advantages

Simple and fast to train

Easy to interpret (coefficients show feature importance)

Works well when data is linearly separable

Less risk of overfitting on small datasets

# Limitations

Cannot capture non-linear relationships

Performs poorly when features interact in complex ways

Assumes a linear decision boundary

#Neural Networks


Neural Networks are non-linear models capable of learning complex relationships between features.

#Advantages

Captures non-linear interactions between indicators

Learns complex market behavior

Performs better on noisy and volatile data

Adapts well to changing market regimes

#Limitations

Requires more data and computation

Less interpretable than Logistic Regression

Higher risk of overfitting if not tuned properly

#Which is better ?

Logistic Regression provides a strong and interpretable baseline but fails to model complex market dynamics. Neural Networks outperform Logistic Regression by capturing non-linear relationships among technical indicators, making them more suitable for predicting future price movements in volatile and dynamic financial markets.