In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [16]:
# 1. Data Preprocessing :
df = pd.read_csv('quantvision_financial_dataset_200.csv')

# Encoding categorical variables :
label_encoders = {}
categorical_columns = ['asset_type', 'market_regime']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# Preparing features and target :
X = df.drop('future_trend', axis=1)
y = df['future_trend']

# Scaling numerical features :
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting data :
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [17]:
# 2. Model Training :

lr_model = LogisticRegression(random_state = 42, max_iter = 1000)
lr_model.fit(X_train, y_train)
lr_prediction = lr_model.predict(X_test)


nn_model = MLPClassifier(
    hidden_layer_sizes = (64, 32),
    activation = 'relu',
    solver = 'adam',
    max_iter = 1000,
    random_state = 42
)

nn_model.fit(X_train, y_train)
nn_prediction = nn_model.predict(X_test)

In [None]:
# 3. Model Evaluation :
def print_metrics(y_true, y_pred, model_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    

    print(f"\n{model_name} Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    

    return accuracy, precision, recall, f1

print("\nTraining Logistic Regression...")
# Logistic Regression metrics
lr_accuracy, lr_precision, lr_recall, lr_f1 = print_metrics(y_test, lr_prediction, "Logistic Regression")

print("\nTraining Neural Network...")
# Neural Network metrics
nn_accuracy, nn_precision, nn_recall, nn_f1 = print_metrics(y_test, nn_prediction, "Neural Network")

# Confusion Matrices :
print("\nConfusion Matrices:")
lr_cm = confusion_matrix(y_test, lr_prediction)
nn_cm = confusion_matrix(y_test, nn_prediction)

print("Logistic Regression Confusion Matrix:")
print(lr_cm)
print("\nNeural Network Confusion Matrix:")
print(nn_cm)


Training Logistic Regression...

Logistic Regression Results:
Accuracy: 0.9250
Precision: 0.9474
Recall: 0.9730
F1-Score: 0.9600

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.33      0.40         3
           1       0.95      0.97      0.96        37

    accuracy                           0.93        40
   macro avg       0.72      0.65      0.68        40
weighted avg       0.91      0.93      0.92        40


Training Neural Network...

Neural Network Results:
Accuracy: 0.9250
Precision: 0.9250
Recall: 1.0000
F1-Score: 0.9610

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.93      1.00      0.96        37

    accuracy                           0.93        40
   macro avg       0.46      0.50      0.48        40
weighted avg       0.86      0.93      0.89        40


Confusion Matrices:
Logistic Regression Confusi

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
# 4. Analysis & Financial Interpretation :
"""
-Why Logistic Regression performs reasonably good or bad??

Logistic Regression is a linear model, so it performs reasonably good when the relationship between
the features (like technical_score, slope_strength, etc.) and future_trend is close to linear or
monotonic.

-Why Neural Networks performs better of worse??

Neural Network can model non‑linear interactions, so it will typically perform better when the
dataset contains complex interactions between volatility, technical_score, trend_continuation and
other features.

-The effect of volatility on predictions??

High_volatility = 1 corresponds to more noise and sudden price reversals, so both models
tend to make more errors on these rows.
When high_volatility = 0, technical_score and slope_strength are more reliable,
so the models produce more stable and accurate predictions

-Role of trend continuation??

When trend_continuation = 1 and aligns with a bullish regime, the probability of future_trend = 1
is higher, so both models gain predictive power from this feature
When trend_continuation = 0, the models rely more on other indicators (technical_score,
candlestick_variance, pattern_symmetry), and their confidence usually decreases.

-Situation where the model fails and why??

Sideways or regime‑change zones (market_regime = sideways, or sudden switches between bullish
and bearish) are where both models typically fail, because price direction is inherently uncertain
and random.
The models also tend to fail on assets or samples where technical_score is extreme but volatility
is high (e.g., strong technical signal but choppy price action)

"""

'\n-Why Logistic Regression performs reasonably good or bad??\n\nLogistic Regression is a linear model, so it performs reasonably good when the relationship between\nthe features (like technical_score, slope_strength, etc.) and future_trend is close to linear or\nmonotonic.\n\n-Why Neural Networks performs better of worse??\n\nNeural Network can model non‑linear interactions, so it will typically perform better when the\ndataset contains complex interactions between volatility, technical_score, trend_continuation and\nother features.\n\n-The effect of volatility on predictions??\n\nHigh_volatility = 1 corresponds to more noise and sudden price reversals, so both models\ntend to make more errors on these rows.\nWhen high_volatility = 0, technical_score and slope_strength are more reliable,\nso the models produce more stable and accurate predictions\n\n-Role of trend continuation??\n\nWhen trend_continuation = 1 and aligns with a bullish regime, the probability of future_trend = 1\nis hi