In [None]:
#Install libraries
# !pip install xgboost
# !pip install lightgbm

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn import neighbors
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB as NB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier as forest

from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn import metrics
from sklearn.metrics import roc_curve, auc, ConfusionMatrixDisplay
from sklearn.model_selection import learning_curve, TimeSeriesSplit, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score

In [None]:

# Load the dataset
df = pd.read_csv('../data/CL_F_data.csv')

window = 5 # set window to the past 5 days

# Compute volatility over window -------------------------------------------------
df['realized_vol']= df['log_return'].rolling(window).std() *np.sqrt(252)
df['rolling_mean'] = df['log_return'].rolling(window).mean() # Rolling mean
df['rolling_std'] = df['log_return'].rolling(window).std() # Rolling standard deviation

# Add diffrent established volatility estimators -------------------------------------------------

## 1) Parkisons volatility
df['parkinson_vol'] = np.sqrt((1 / (4 * np.log(2))) * (np.log(df['High'] / df['Low']) ** 2)) 

## 2) Garman–Klass
df['garman_klass'] = np.sqrt(
    0.5 * (np.log(df['High'] / df['Low']) ** 2)
  - (2 * np.log(2) - 1) * (np.log(df['Close'] / df['Open']) ** 2)
)

## 3) Rogers–Satchell
df['rogers_satchell'] = np.sqrt(
    (np.log(df['High'] / df['Open']) * 
     (np.log(df['High'] / df['Open']) - np.log(df['Close'] / df['Open'])))
  + (np.log(df['Low']  / df['Open']) * 
     (np.log(df['Low']  / df['Open']) - np.log(df['Close'] / df['Open'])))
)

## 4) Yang–Zhang
###   a) Overnight & open-to-close returns
df['overnight_ret']    = np.log(df['Open'] / df['Close'].shift(1))
df['open_close_ret']   = np.log(df['Close'] / df['Open'])

###   b) rolling variances
k = 0.34
ov_var = df['overnight_ret'].rolling(window).var()       
oc_var = df['open_close_ret'].rolling(window).var()      
rs_var = df['rogers_satchell']**2                        

###   c) combine and annualize by sqrt(252)
yz_var = ov_var + k * oc_var + (1 - k) * rs_var
df['yang_zhang'] = np.sqrt(yz_var * 252)

## 5) Volume dynamics: daily percentage change in trading volume -------------------------------------------------
df['volume_change'] = df['Volume'].pct_change()
df = df.dropna() 
df['future_vol'] = df['realized_vol'].shift(-window)
df = df.dropna()
df['target'] = (df['future_vol'] > df['realized_vol']).astype(int)

## 6) Add new features to improve AUC scores -------------------------------------------------
df['return_lag1'] = df['log_return'].shift(1)
df['volume_lag1'] = df['Volume'].shift(1)

def rolling_stats(series, window):
    return series.rolling(window).mean(), series.rolling(window).std()

df['ma_5'], df['std_5'] = rolling_stats(df['log_return'], 5)
df['ma_10'], df['std_10'] = rolling_stats(df['log_return'], 10)
df['momentum_5'] = df['log_return'] - df['log_return'].shift(5)
df['volatility_5'] = df['log_return'].rolling(5).std()
df.dropna(inplace=True)

In [None]:
# Define predictors and target

predictors = df[[ 'rolling_std', 'yang_zhang', 'std_5', 'std_10', 'volatility_5'
                  ]]
target = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    predictors, target, 
    test_size=1/5, shuffle=False
)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
tscv = TimeSeriesSplit(n_splits=5) # A time series split allows for cross-validation that respects temporal order

# Update KNN grid to ensure n_neighbors <= min training samples
max_n_neighbors = min(len(X_train), len(y_train))  # Smallest training set size

# Define models and small hyperparameter grids
model_grids = {
    'Decision Tree': {
        'model': DecisionTreeClassifier(random_state=42, class_weight='balanced'),
        'grid':  {'max_depth': [3, 10, None]} # max_depth sets the maximum number of splits from the root of the lead
                                              # controls complexity of the DT, shallow depths mean high bias, deep deeps means to many varience
    },
    'Random Forest': {
        'model': forest(random_state=42, class_weight='balanced'),
        'grid':  {'n_estimators': [50, 100, 200]}
    },
    'Logistic Regression': {
        'model': LogisticRegression(max_iter=1000, class_weight='balanced'),
        'grid':  {'C': [0.1, 1]}
    },
    'KNN': {
        'model': neighbors.KNeighborsClassifier(weights='distance'), # A distance weight treats closer points with greater influence 
        'grid':  {'n_neighbors': [n for n in [5, 10, 20, 30] if n <= max_n_neighbors]}
    },
    'SVM': {
        'model': SVC(kernel='rbf',max_iter= 2000, probability=True, random_state=42),
        'grid': {'C': [0.1, 1, 10], 'gamma': ['scale', 0.01, 0.1, 1, 'auto']}
    },
    'XGBoost': {
        'model': XGBClassifier(eval_metric='logloss', random_state=42),
        'grid': {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7], 'learning_rate': [0.01, 0.1, 0.2]}
    },
    'LightGBM': {
        'model': LGBMClassifier(max_depth= -1, random_state=42, verbosity=-1,),
        'grid': {'n_estimators': [50, 100, 200, 500], 'num_leaves': [31, 63, 128], 'learning_rate': [0.01, 0.1]}
    }
}

best_models = {}

# Grid‐search each model
for name, cfg in model_grids.items():
    gs = GridSearchCV(cfg['model'], cfg['grid'], cv=tscv, scoring='roc_auc')
    # scale the inputs where needed
    if name in ('Logistic Regression','KNN', 'SVM' ):
        gs.fit(X_train_scaled, y_train)
    else:
        gs.fit(X_train, y_train)
    best_models[name] = gs.best_estimator_
    print(f'{name} -- best parameter: {gs.best_params_}')

In [None]:
# Train models

## Decision Tree model
tree = best_models['Decision Tree']
tree.fit(X_train, y_train)

## Logistic Regression model
logr = best_models['Logistic Regression']
logr.fit(X_train_scaled , y_train)

## Random Forest model
rf = best_models['Random Forest']
rf.fit(X_train, y_train)

## KNN model
knn = best_models['KNN']
knn.fit(X_train_scaled, y_train)

## SVM model
svm = best_models['SVM']
svm.fit(X_train_scaled, y_train)

## XGBoost model
xg = best_models['XGBoost']
xg.fit(X_train, y_train)

## LightGBM model
gbm = best_models['LightGBM']
gbm.fit(X_train, y_train)

## Naive Bayes model for baseline
nb_model = NB()
nb_model.fit(X_train, y_train)

## Linear discriminant analysis model for baseline
lda_model = LDA()
lda_model.fit(X_train, y_train)

In [None]:
# Function to score models
def getScore (X_test, y_test, model):

    # Check which model is being scored and choose title
    model_titles = {
        tree: "DT", logr: "LR", nb_model: "NB",
        lda_model: "LDA", rf: "Forest", svm: "SVM",
        xg: "XG Boost", gbm: "LightGBM", knn: "KNN"
    }

    title = model_titles.get(model, "XG")
    
    names = ['negative', 'positive']
    y_pred = model.predict(X_test)

    # Uncomment once model selection is complete
    '''
    # Ensure consistent lengths
    if len(y_test) != len(y_pred):
        raise ValueError(f"Inconsistent lengths: y_test ({len(y_test)}) and y_pred ({len(y_pred)})")
    
    # Plot confusion matrix
    cm = metrics.confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(cm, display_labels=names)
    plt.figure(figsize =(8, 6))
    disp.plot(cmap =plt.cm.Greys)
    plt.title(f'{title} Confusion Matrix')
    plt.show()
    '''
    
    # Calculating classification report, accuracy, average precision and recall across all classes
    cf = metrics.classification_report(y_test, y_pred, zero_division=0, target_names=names)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred, average='macro')
    recall = metrics.recall_score(y_test, y_pred, average='macro')  # True Positive Rate        

    # Save the scores
    with open(f'../out/{title}_scores.txt', "w") as f:
        f.write(f"Classification Report: \n{cf}\n")
        f.write(f"Accuracy: {accuracy:.2f}\n")
        f.write(f"Precision (macro avg): {precision:.2f}\n")
        f.write(f"True Positive Rate (Recall - macro avg): {recall:.2f}\n")
        
    return cf

In [None]:
# Function to plot area under curve
def plotAuc (X_test, X_test_scaled, y_test, logr, tree, rf, lda, nb, knn, svm, xg, gbm):
    
    # Decision tree model
    tree_probs = tree.predict_proba(X_test)[:, 1]
    tree_fpr, tree_tpr, _ = roc_curve(y_test, tree_probs)
    tree_roc_auc = auc(tree_fpr, tree_tpr)

    # Logistic regression model
    logr_probs = logr.predict_proba(X_test_scaled )[:, 1]
    logr_fpr, logr_tpr, _ = roc_curve(y_test, logr_probs)
    logr_roc_auc = auc(logr_fpr, logr_tpr)

    # Bayes model
    nb_probs = nb.predict_proba(X_test)[:, 1]
    nb_fpr, nb_tpr, _ = roc_curve(y_test, nb_probs)
    nb_roc_auc = auc(nb_fpr, nb_tpr)

    # Linear discriminant analysis model
    lda_probs = lda.predict_proba(X_test)[:, 1]
    lda_fpr, lda_tpr, _ = roc_curve(y_test, lda_probs)
    lda_roc_auc = auc(lda_fpr, lda_tpr)

    # Random forest model
    rf_probs = rf.predict_proba(X_test)[:, 1]
    rf_fpr, rf_tpr, _ = roc_curve(y_test, rf_probs)
    rf_roc_auc = auc(rf_fpr, rf_tpr)

    # SVM model
    svm_probs = svm.predict_proba(X_test_scaled)[:, 1]
    svm_fpr, svm_tpr, _ = roc_curve(y_test, svm_probs)
    svm_roc_auc = auc(svm_fpr, svm_tpr)

    # XG model
    xg_probs = xg.predict_proba(X_test)[:, 1]
    xg_fpr, xg_tpr, _ = roc_curve(y_test, xg_probs)
    xg_roc_auc = auc(xg_fpr, xg_tpr)

    # GBM model
    gbm_probs = gbm.predict_proba(X_test)[:, 1]
    gbm_fpr, gbm_tpr, _ = roc_curve(y_test, gbm_probs)
    gbm_roc_auc = auc(gbm_fpr, gbm_tpr)    

    # KNN model
    knn_probs = knn.predict_proba(X_test_scaled)[:, 1]
    knn_fpr, knn_tpr, _ = roc_curve(y_test, knn_probs)
    knn_roc_auc = auc(knn_fpr, knn_tpr)

    # Plot ROC Curves
    plt.figure()
    plt.plot(tree_fpr, tree_tpr, label=f'Decision Tree (AUC = {tree_roc_auc:.2f})')
    plt.plot(logr_fpr, logr_tpr, label=f'Logistic Regression (AUC = {logr_roc_auc:.2f})')
    plt.plot(nb_fpr, nb_tpr, label=f'Bayes (AUC = {nb_roc_auc:.2f})')
    plt.plot(lda_fpr, lda_tpr, label=f'Linear discriminant analysis (AUC = {lda_roc_auc:.2f})')
    plt.plot(rf_fpr, rf_tpr, label=f'Random Forest (AUC = {rf_roc_auc:.2f})')
    plt.plot(knn_fpr, knn_tpr, label=f'SVM (AUC = {svm_roc_auc:.2f})')
    plt.plot(knn_fpr, knn_tpr, label=f'XG Boost (AUC = {xg_roc_auc:.2f})')
    plt.plot(knn_fpr, knn_tpr, label=f'Light GBM (AUC = {gbm_roc_auc:.2f})')
    plt.plot(knn_fpr, knn_tpr, label=f'k-nearest neighbor (AUC = {knn_roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve Comparison')
    plt.legend(loc='lower right')
    plt.grid(True)
    plt.show()

    # Print AUC values
    print(f'Decision Tree AUC: {tree_roc_auc:.3f}')
    print(f'Logistic Regression AUC: {logr_roc_auc:.3f}')
    print(f'Bayes AUC: {nb_roc_auc:.3f}')
    print(f'Linear discriminat AUC: {lda_roc_auc:.3f}')
    print(f'Random Forest AUC: {rf_roc_auc:.3f}')
    print(f'SVM AUC = {svm_roc_auc:.2f})')
    print(f'XG Boost AUC = {xg_roc_auc:.2f})')
    print(f'Light GBM AUC = {gbm_roc_auc:.2f})')
    print(f'k-nearest neighbor AUC: {knn_roc_auc:.3f}')    

    # Save the AUC scores
    with open(f'../out/AUC_scores.txt', "w") as f:
        f.write(f'Decision Tree AUC: {tree_roc_auc:.3f}\n')
        f.write(f'Logistic Regression AUC: {logr_roc_auc:.3f}\n')
        f.write(f'Bayes AUC: {nb_roc_auc:.3f}\n')
        f.write(f'Linear discriminat AUC: {lda_roc_auc:.3f}\n')
        f.write(f'Random Forest AUC: {rf_roc_auc:.3f}\n')
        f.write(f'SVM (AUC = {svm_roc_auc:.2f})\n')
        f.write(f'XG Boost (AUC = {xg_roc_auc:.2f})\n')
        f.write(f'Light GBM (AUC = {gbm_roc_auc:.2f})\n')
        f.write(f'k-nearest neighbor AUC: {knn_roc_auc:.3f}\n')
    
    # Save the figures
    os.makedirs('../out', exist_ok=True)
    plt.savefig(f'../out/ROC.png')

    return plt

In [None]:
# Function to plot scoring
def getFit (X_train, y_train, model, score, scoring_type):

    # Check which model is being scored and choose title
    model_titles = {
        tree: "DT", logr: "LR", nb_model: "NB",
        lda_model: "LDA", rf: "Forest", svm: "SVM",
        xg: "XG Boost", gbm: "LightGBM", knn: "KNN"
    }

    title = model_titles.get(model, "XG")

    # Compute learning curve
    train_sizes, train_scores, val_scores = learning_curve(
        model, X_train, y_train, cv=5, scoring= scoring_type, 
        train_sizes=np.linspace(0.1, 1.0, 10)
    )

    # Mapping of scoring types to titles
    scoreTitle = {
        "precision": "Precision",
        "neg_log_loss": "Log Loss",
        "balanced_accuracy": "B_accuracy"
    }[scoring_type]

    # Convert negative log loss scores to positive values for interpretation if scoring type is negative log loss
    if scoring_type == 'neg_log_loss':
        score = -score
        train_scores = -train_scores
        val_scores = -val_scores
    
    # Uncomment once model selection is complete
    '''
    # Create figure and plot for fitting curve
    fig1, ax1 = plt.subplots(figsize=(8, 6))
    ax1.plot(train_sizes, train_scores.mean(axis=1), 'o-', label=f"Training Accuracy {scoreTitle}", color="grey")
    ax1.plot(train_sizes, val_scores.mean(axis=1), 'o-', label="Holdout Accuracy", color="black")
    # Graph labels
    ax1.set_xlabel("Number of samples")
    ax1.set_ylabel(f"{scoreTitle} (%)")
    ax1.set_title(f"{title} fitting graph")
    ax1.legend()
    ax1.grid(True)

    # Save the figures
    os.makedirs('../out', exist_ok=True)
    fig1.savefig(f'../out/{title}_fitting_graph_{scoreTitle}.png', dpi=300)
    '''

    # Save the scores
    with open(f'../out/score_summary.txt', "a") as f:
        f.write(f"{title}\n")
        f.write(f"{scoreTitle}\n")
        f.write(f"-----------\n")
        f.write(f"Cross-validation scores:\n{', '.join(map(str, score))}\n")
        f.write(f"Mean {scoreTitle}: {score.mean():.4f}\n")
        f.write(f"Standard deviation: {score.std():.4f}\n\n")

    print(f"Plots and score's saved successfully in ../out/")
    return fig1

In [None]:
# Evaluate the models
# Define possible values
config = {   
    "model": [ 
        "tree", "logr", "nb_model", "lda_model",
        "rf", "svm", "xg", "gbm", "knn"
        ],
}

models = {
    "tree": tree, "logr": logr, "nb_model": nb_model,
    "lda_model": lda_model, "rf": rf, "svm": svm,
    "xg": xg, "gbm": gbm, "knn": knn
}

scoring_type = "balanced_accuracy"

# Iterate over all combinations
for model_name in config["model"]:
    model = models[model_name]  # Get the actual model object
    
    # Plot results based on model type
    if model_name in ["logr" or "knn" or "svm"]:
        score = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring=scoring_type) ## using log loss scoring to penalizes confidently wrong probabilities to ensure there is well-calibrated probabilities
        fig1 = getScore (X_test_scaled, y_test, model) # Score model
        fig2 = getFit (X_train_scaled, y_train, model, score, scoring_type=scoring_type) # Plot fitting graph
 
    else:
        score = cross_val_score(model, X_train, y_train, cv=5, scoring=scoring_type)
        fig1 = getScore (X_test, y_test, model)
        fig2 = getFit (X_train, y_train, model, score, scoring_type=scoring_type) 
    plt.show()


aucFig = plotAuc (X_test, X_test_scaled, y_test, logr, tree, rf, lda_model, nb_model, knn, svm, xg, gbm)

## Testing on new data below

In [None]:

# Load the dataset
testing_dataf = pd.read_csv('../data/Cl_F_april_data.csv', encoding='utf-8')

window = 5 # set Window to the past 5 days

# Compute volatility over window -------------------------------------------------
testing_dataf['realized_vol']= testing_dataf['log_return'].rolling(window).std() *np.sqrt(252)
testing_dataf['rolling_mean'] = testing_dataf['log_return'].rolling(window).mean() # Rolling mean
testing_dataf['rolling_std'] = testing_dataf['log_return'].rolling(window).std() # Rolling standard deviation

# Add diffrent established volatility estimators -------------------------------------------------

## 1) Parkisons volatility
testing_dataf['parkinson_vol'] = np.sqrt((1 / (4 * np.log(2))) * (np.log(testing_dataf['High'] / testing_dataf['Low']) ** 2)) 

## 2) Garman–Klass
testing_dataf['garman_klass'] = np.sqrt(
    0.5 * (np.log(testing_dataf['High'] / testing_dataf['Low']) ** 2)
  - (2 * np.log(2) - 1) * (np.log(testing_dataf['Close'] / testing_dataf['Open']) ** 2)
)

## 3) Rogers–Satchell
testing_dataf['rogers_satchell'] = np.sqrt(
    (np.log(testing_dataf['High'] / testing_dataf['Open']) * 
     (np.log(testing_dataf['High'] / testing_dataf['Open']) - np.log(testing_dataf['Close'] / testing_dataf['Open'])))
  + (np.log(testing_dataf['Low']  / testing_dataf['Open']) * 
     (np.log(testing_dataf['Low']  / testing_dataf['Open']) - np.log(testing_dataf['Close'] / testing_dataf['Open'])))
)

## 4) Yang–Zhang
###   a) Overnight & open-to-close returns
testing_dataf['overnight_ret']    = np.log(testing_dataf['Open'] / testing_dataf['Close'].shift(1))
testing_dataf['open_close_ret']   = np.log(testing_dataf['Close'] / testing_dataf['Open'])

###   b) rolling variances
k = 0.34
ov_var = testing_dataf['overnight_ret'].rolling(window).var()       # σ_o^2
oc_var = testing_dataf['open_close_ret'].rolling(window).var()      # σ_c^2
rs_var = testing_dataf['rogers_satchell']**2                        # σ_RS^2

###   c) combine and annualize by sqrt(252)
yz_var = ov_var + k * oc_var + (1 - k) * rs_var
testing_dataf['yang_zhang'] = np.sqrt(yz_var * 252)

## 5) Volume dynamics: daily percentage change in trading volume -------------------------------------------------
testing_dataf['volume_change'] = testing_dataf['Volume'].pct_change()
#testing_dataf = testing_dataf.dropna() 
testing_dataf['future_vol'] = testing_dataf['realized_vol'].shift(-window)
#testing_dataf = testing_dataf.dropna()
testing_dataf['target'] = (testing_dataf['future_vol'] > testing_dataf['realized_vol']).astype(int)

## 6) Add new features to improve AUC scores -------------------------------------------------
testing_dataf['return_lag1'] = testing_dataf['log_return'].shift(1)
testing_dataf['volume_lag1'] = testing_dataf['Volume'].shift(1)

def rolling_stats(series, window):
    return series.rolling(window).mean(), series.rolling(window).std()

testing_dataf['ma_5'], testing_dataf['std_5'] = rolling_stats(testing_dataf['log_return'], 5)
testing_dataf['ma_10'], testing_dataf['std_10'] = rolling_stats(testing_dataf['log_return'], 10)
testing_dataf['momentum_5'] = testing_dataf['log_return'] - testing_dataf['log_return'].shift(5)
testing_dataf['volatility_5'] = testing_dataf['log_return'].rolling(5).std()
testing_dataf.dropna(inplace=True)

In [None]:
# Define two new data records
testing_X = testing_dataf[['rolling_std', 'yang_zhang', 'std_5', 'std_10', 'volatility_5']]
testing_y = testing_dataf['target']

newModel = xg
testing_fig1 = getScore(testing_X, testing_y, newModel)
plt.show()

# Predict probabilities for new data
newProb = newModel.predict_proba(testing_X)

# Create a probability prediction table
seqRank = pd.DataFrame(newProb, columns=['Volatility down (0)', 'Volatility up (1)'])
seqRank['Max Probability'] = seqRank.max(axis=1)
seqRank['Predicted Class'] = newModel.predict(testing_X)
seqRank['Volatility Prediction'] = seqRank['Predicted Class'].apply(lambda x: 'Up' if x == 1 else 'Down')
seqRank['Date'] = testing_dataf['Date'].values # Add a date to the data frame

In [None]:
# Make trades

p_up = seqRank['Volatility up (1)']  # Probability of volatility going up
p_down = seqRank['Volatility down (0)']  # Probability of volatility going down

# Generate trading signals based on thresholds
tau0 = 0.8 # Buying confidence threshold
tau1 = 0.7 # Selling confidence threshold

seqRank['Signal'] = np.where(p_up >= tau0, 1,  # Buy signal if confidence is high for volatility up
                     np.where(p_down >= tau1, -1, 0))  # Sell signal if confidence is high for volatility down

# Define position sizing logic
max_betting_amount  = 100_000  # Base capital for trading
seqRank['Order Size'] = np.where(
    seqRank['Signal'] == 1, max_betting_amount * (p_up - (1 - p_up)),  # Buy order size
    np.where(
        seqRank['Signal'] == -1, max_betting_amount * (p_down - (1 - p_down)),  # Sell order size
        'No trade'  # No trade
    )
)

# Reorder the columns to make Date the leftmost column
seqRank['Signal'] = seqRank['Signal'].apply(lambda x: 'Buy' if x == 1 else ('Sell' if x == -1 else 'No Trade'))
probRank = seqRank[['Date', 'Volatility down (0)', 'Volatility up (1)', 'Max Probability', 'Volatility Prediction', 'Signal', 'Order Size']]
seqRank = probRank

probRank = seqRank.sort_values(by='Max Probability', ascending=False)  # rank by confidence

# Save the rank
probRank.to_csv('../out/probabilityRankings.csv', index=True) # output rank by probability
seqRank.to_csv('../out/sequentialRankings.csv', index=False) # output sort by sequential data