<a href="https://colab.research.google.com/github/MPM552/PineScripting/blob/main/PineBacktesterRandomForest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# 1. Data Acquisition with Index Flattening
def get_data(ticker="SPY", period="2y", interval="1d"):
    # auto_adjust=True ensures we get 'Close' consistently
    data = yf.download(ticker, period=period, interval=interval, auto_adjust=True)
    vix = yf.download("^VIX", period=period, interval=interval, auto_adjust=True)

    # Flatten MultiIndex columns if they exist
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = data.columns.get_level_values(0)
    if isinstance(vix.columns, pd.MultiIndex):
        vix.columns = vix.columns.get_level_values(0)

    data['VIX'] = vix['Close']
    return data

In [None]:
# 2. Replicating Pine Script Logic (Feature Engineering)
def apply_zones(df, pivot_len=5, smooth_len=30, vix_mult=0.25):
    # Pivot Highs/Lows
    df['PH'] = df['High'].rolling(window=pivot_len*2+1, center=True).max()
    df['PL'] = df['Low'].rolling(window=pivot_len*2+1, center=True).min()

    # Forward fill levels to simulate "last known structure"
    df['structRes'] = df['PH'].ffill()
    df['structSup'] = df['PL'].ffill()

    # VIX-based Zone Thickness
    # Note: mintick is approximated as 0.01 for liquid stocks
    df['zoneSize'] = df['VIX'] * vix_mult * 0.01

    # Apply Smoothing (EMA)
    df['resMid'] = df['structRes'].ewm(span=smooth_len).mean()
    df['supMid'] = df['structSup'].ewm(span=smooth_len).mean()
    df['midZone'] = (df['resMid'] + df['supMid']) / 2

    # Features: Distance from zones
    df['dist_to_res'] = df['Close'] - df['resMid']
    df['dist_to_sup'] = df['Close'] - df['supMid']
    df['dist_to_mid'] = df['Close'] - df['midZone']

    return df.dropna()

In [None]:
# 3. Target Labeling (The "Optimal" Entry)
def label_data(df, horizon=5):
    # Long Entry (1): Price goes up X% in 'horizon' days
    # Short Entry (2): Price goes down X% in 'horizon' days
    # No Trade (0)
    df['future_return'] = df['Close'].shift(-horizon) / df['Close'] - 1

    conditions = [
        (df['future_return'] > 0.02), # Long
        (df['future_return'] < -0.02) # Short
    ]
    df['target'] = np.select(conditions, [1, 2], default=0)
    return df.dropna()

In [None]:
# 4. Model Training
data = get_data()
data = apply_zones(data)
data = label_data(data)

features = ['dist_to_res', 'dist_to_sup', 'dist_to_mid', 'VIX']
X = data[features]
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 5. Prediction
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

# Precision	"Quality of Signal." When the model says "Buy" (1), how often is it actually a winning trade? Your Long precision is 0.43 (43%).
# Recall	"Opportunity Capture." Out of all the great "Long" opportunities that actually happened, how many did the model actually catch? Your Long recall is 0.27 (27%).
# F1-Score	The balance between Precision and Recall. Itâ€™s a "general health" score for that specific signal.
# Support	The number of actual occurrences in your testing data. (e.g., there were 81 "Hold" days but only 11 "Long" and 6 "Short" days).

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


              precision    recall  f1-score   support

           0       0.87      0.95      0.91        81
           1       0.43      0.27      0.33        11
           2       0.50      0.17      0.25         6

    accuracy                           0.83        98
   macro avg       0.60      0.46      0.50        98
weighted avg       0.79      0.83      0.80        98

