In [47]:
import numpy as np # type: ignore
import pandas as pd # type: ignore
import json

# Load the JSON file
with open('data.json', 'r') as f:
    json_data = json.load(f)

# Convert the JSON data to a DataFrame
# Assuming the JSON structure is a dictionary where the values are lists of metrics
data = pd.DataFrame(json_data)

# Define the list of features you want to use
features = ['Volatility_20', 'SMA_20', 'EMA_20', 'Upper_Band', 'Lower_Band', 
            'ATR_14', 'Return_Lag_1', 'Return_Lag_5', 'Return_Lag_10']

# Ensure the data contains the necessary columns
missing_features = [col for col in features if col not in data.columns]
if missing_features:
    raise ValueError(f"The following required features are missing from the data: {missing_features}")

# Select the input features (X)
X = data[features]

# Define a threshold for the target variable based on Rolling Volatility
# Assuming 'threshold' is a value you've defined earlier in the code
threshold = 0.05  # Example threshold (replace with your actual threshold)

# Create the binary target variable (y)
y = (data['Volatility_20'] > threshold).astype(int)

# Display the first few rows of the DataFrame for validation
print(X.head())
print(y.head())


   Volatility_20    SMA_20    EMA_20  Upper_Band  Lower_Band    ATR_14  \
0       0.054365  0.917634  0.940645    1.026364    0.808904  0.069794   
1       0.051084  0.915988  0.935084    1.018155    0.813820  0.067323   
2       0.052027  0.915681  0.933879    1.019736    0.811626  0.063417   
3       0.048601  0.921484  0.936775    1.018687    0.824282  0.064174   
4       0.049017  0.927985  0.944551    1.026019    0.829952  0.065928   

   Return_Lag_1  Return_Lag_5  Return_Lag_10  
0     -0.033735     -0.106904      -0.035478  
1     -0.014339     -0.103234      -0.072728  
2      0.045541     -0.060796      -0.089758  
3      0.045372      0.062731      -0.029759  
4      0.056134      0.099398       0.073529  
0    1
1    1
2    1
3    0
4    0
Name: Volatility_20, dtype: int64


In [48]:
from sklearn.model_selection import train_test_split # type: ignore

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [49]:
from sklearn.linear_model import LogisticRegression

model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)

lr_pred_train = model_lr.predict_proba(X_train)[:, 1]  # Probability for class 1
lr_pred_test = model_lr.predict_proba(X_test)[:, 1]


In [50]:
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
model_rf.fit(X_train, y_train)

rf_pred_train = model_rf.predict_proba(X_train)[:, 1]
rf_pred_test = model_rf.predict_proba(X_test)[:, 1]


In [51]:
from sklearn.ensemble import GradientBoostingClassifier

model_gbm = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)
model_gbm.fit(X_train, y_train)

gbm_pred_train = model_gbm.predict_proba(X_train)[:, 1]
gbm_pred_test = model_gbm.predict_proba(X_test)[:, 1]


In [52]:
train_stack = np.column_stack((lr_pred_train, rf_pred_train, gbm_pred_train))
test_stack = np.column_stack((lr_pred_test, rf_pred_test, gbm_pred_test))


In [53]:
# Train the meta-model
model_meta = LogisticRegression()
model_meta.fit(train_stack, y_train)

# Final predictions on the test set
final_pred = model_meta.predict_proba(test_stack)[:, 1]


In [54]:
from sklearn.metrics import accuracy_score, roc_auc_score

# Convert probabilities to binary predictions (0 or 1)
final_pred_binary = (final_pred > 0.5).astype(int)

# Evaluate performance
accuracy = accuracy_score(y_test, final_pred_binary)
roc_auc = roc_auc_score(y_test, final_pred)

print(f"Accuracy: {accuracy}")
print(f"ROC-AUC: {roc_auc}")


Accuracy: 1.0
ROC-AUC: 1.0


In [55]:
# Generate trading signals based on final predictions
data['Ensemble_Signal'] = 0  # Default to hold/no action
data.loc[X_test.index, 'Ensemble_Signal'] = final_pred_binary  # Update with model's prediction

# 1 = Buy, 0 = Hold/Sell (depending on how you set up the strategy)


In [56]:
def backtest_strategy(signals, prices):
    initial_cash = 100000000
    cash = initial_cash
    position = 0  # Number of shares held

    for i in range(len(signals)):
        if signals[i] == 1:  # Buy signal
            position += cash / prices[i]  # Buy as many shares as possible
            cash = 0  # All cash used to buy shares
        elif signals[i] == 0 and position > 0:  # Sell signal (assuming 0 = sell)
            cash += position * prices[i]  # Sell all shares
            position = 0  # No shares left

    # Final portfolio value
    final_value = cash + position * prices[len(signals) - 1]
    print(f'position end: {position}')
    return final_value

# Assuming 'Close' prices are available in the 'data' DataFrame
final_portfolio_value = backtest_strategy(data['Ensemble_Signal'], data['Close'])
print(f"Final Portfolio Value: ${final_portfolio_value:.2f}")


position end: 0
Final Portfolio Value: $42825126.42
