In [33]:

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


In [34]:

df = pd.read_csv("/content/quantvision_financial_dataset_200.csv")
df.head()


Unnamed: 0,lookback_days,asset_type,market_regime,high_volatility,trend_continuation,technical_score,edge_density,slope_strength,candlestick_variance,pattern_symmetry,future_trend
0,48,equity,bullish,0,1,59.99,0.504,0.298,1.572,0.768,1
1,38,index,bullish,1,1,78.54,0.559,0.037,0.692,0.538,1
2,24,equity,bullish,1,0,56.03,0.617,0.212,1.419,0.301,1
3,52,equity,bullish,0,0,66.51,0.36,0.347,0.699,0.498,1
4,17,equity,bullish,1,1,61.21,0.492,0.144,2.52,0.828,1


In [35]:

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   lookback_days         200 non-null    int64  
 1   asset_type            200 non-null    object 
 2   market_regime         200 non-null    object 
 3   high_volatility       200 non-null    int64  
 4   trend_continuation    200 non-null    int64  
 5   technical_score       200 non-null    float64
 6   edge_density          200 non-null    float64
 7   slope_strength        200 non-null    float64
 8   candlestick_variance  200 non-null    float64
 9   pattern_symmetry      200 non-null    float64
 10  future_trend          200 non-null    int64  
dtypes: float64(5), int64(4), object(2)
memory usage: 17.3+ KB


In [24]:

X = df.drop("future_trend", axis=1)
y = df["future_trend"]


In [25]:
#distributing all the features into 3 broad categories to help with pre procession
categorical_features = ["asset_type", "market_regime"]

numerical_features = [
    "lookback_days",
    "technical_score",
    "edge_density",
    "slope_strength",
    "candlestick_variance",
    "pattern_symmetry"
]

binary_features = ["high_volatility", "trend_continuation"]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(drop="first"), categorical_features),
        ("bin", "passthrough", binary_features)
    ]
)


In [75]:
from numpy.random.mtrand import ranf

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=4,
    stratify=y
)


In [27]:

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)


In [66]:

log_reg = LogisticRegression(max_iter=100)
log_reg.fit(X_train_processed, y_train)

y_pred_lr = log_reg.predict(X_test_processed)


In [76]:

print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Precision:", precision_score(y_test, y_pred_lr))
print("Recall:", recall_score(y_test, y_pred_lr))
print("F1 Score:", f1_score(y_test, y_pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lr))


Accuracy: 0.925
Precision: 0.925
Recall: 1.0
F1 Score: 0.961038961038961
Confusion Matrix:
 [[ 0  3]
 [ 0 37]]


In [77]:

mlp = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    activation="relu",
    max_iter=1000,
    random_state=42
)

mlp.fit(X_train_processed, y_train)
y_pred_mlp = mlp.predict(X_test_processed)


In [55]:

print("Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("Precision:", precision_score(y_test, y_pred_mlp))
print("Recall:", recall_score(y_test, y_pred_mlp))
print("F1 Score:", f1_score(y_test, y_pred_mlp))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_mlp))


Accuracy: 0.9
Precision: 0.9230769230769231
Recall: 0.972972972972973
F1 Score: 0.9473684210526315
Confusion Matrix:
 [[ 0  3]
 [ 1 36]]


In [51]:

print("Logistic Regression Report")
print(classification_report(y_test, y_pred_lr))

print("Neural Network Report")
print(classification_report(y_test, y_pred_mlp))


Logistic Regression Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.93      1.00      0.96        37

    accuracy                           0.93        40
   macro avg       0.46      0.50      0.48        40
weighted avg       0.86      0.93      0.89        40

Neural Network Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.92      0.97      0.95        37

    accuracy                           0.90        40
   macro avg       0.46      0.49      0.47        40
weighted avg       0.85      0.90      0.88        40



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
