# This code represents a trading strategy for 1 hour trading intervals
# The predictor is the closing price of the Litecoin cryptocurrency and the minimum threshold is 0.1

In [1]:
################# IMPORTS AND SETUP ##############################


from binance import Client
import pandas as pd
from supporting_functionsM2 import *
from API_KEYS2 import get_keys

# Setting up binance client
KEY, SECRET = get_keys()
client = Client(KEY, SECRET)

# Defining setup variables for creating dataset
start = "1483225200" # 01.01.2017
periods = 125 # approx 8 years
TICKER = "LTCUSDT"
options = ["high", "low", "close", "volume"] # BASE OPTIONS
EPOCHS=5
THRESHOLD=0.1 # Threshold value for lableling, ie. BTCUSDT:close = 0.5 > 0.0 => label = 1, if BTCUSDT:close = -0.5 < 0.0 => 0
PREDICTOR="LTCUSDT:close"
TIME = "LTCUSDT:time"

In [2]:
################# DOWNLOADING RAW DATA ##########################################

periods = periods
LIMIT = 720 # 720h = 30 days
start1 = start
end1 = next_30_days_unix_timestamp(start1)

############################################ DOWNLOADING DATA ###################################################################### 
data = pd.DataFrame(columns=["time", "open", "high", "low", "close", "volume"])

# downloading the first set of candlestick lines
klines = client.get_historical_klines(TICKER, client.KLINE_INTERVAL_1HOUR, limit=LIMIT, start_str=unix_to_datetime_string(start1, in_milliseconds=False), end_str=unix_to_datetime_string(end1, in_milliseconds=False))
    # print(klines)

# Converting data from list to pandas dataframe
new_data = pd.DataFrame(data=[row[0:6] for row in klines], columns=["time", "open", "high", "low", "close", "volume"])
data = pd.concat([data, new_data], ignore_index=True)

for i in range(periods - 1):
    # Moving the start and end interval to next day
    start1 = next_30_days_unix_timestamp(start1)
    end1 = next_30_days_unix_timestamp(start1) 

    # downloading candlestick lines
    klines = client.get_historical_klines(TICKER, client.KLINE_INTERVAL_1HOUR, limit=LIMIT, start_str=unix_to_datetime_string(start1, in_milliseconds=False), end_str=unix_to_datetime_string(end1, in_milliseconds=False))
    # print(klines)

    # Converting data from list to pandas dataframe
    new_data = pd.DataFrame(data=[row[0:6] for row in klines], columns=["time", "open", "high", "low", "close", "volume"])

    # concatinating the new data with the existing data
    data = pd.concat([data, new_data], ignore_index=True)

# converting all time values from unix to readable string, not important, just for visual purposes and fact checking
data["time"] = data["time"].apply(unix_to_datetime_string) #converting time from 

raw_data = data
display(raw_data)


Unnamed: 0,time,open,high,low,close,volume
0,2017-12-13 03:00:00,272.00000000,288.00000000,272.00000000,276.00000000,130.86075000
1,2017-12-13 04:00:00,276.00000000,288.00000000,269.99000000,279.00000000,606.48130000
2,2017-12-13 05:00:00,279.00000000,330.00000000,279.00000000,302.00000000,383.76674000
3,2017-12-13 06:00:00,302.00000000,304.11000000,286.00000000,291.13000000,351.59475000
4,2017-12-13 07:00:00,291.13000000,296.00000000,285.00000000,287.00000000,165.12420000
...,...,...,...,...,...,...
64391,2025-04-22 06:00:00,78.66000000,78.97000000,78.25000000,78.64000000,10328.30600000
64392,2025-04-22 07:00:00,78.64000000,79.75000000,78.50000000,79.65000000,13289.86000000
64393,2025-04-22 08:00:00,79.65000000,80.12000000,79.40000000,79.86000000,17718.42700000
64394,2025-04-22 09:00:00,79.85000000,80.02000000,79.30000000,79.42000000,12795.00900000


In [3]:
########################################## PREPROCESSING DATA ####################################################################

# New dataobject for storing processed data
# processed_data = {f"{TICKER}:time": [], f"{TICKER}:open": [], f"{TICKER}:high": [], f"{TICKER}:low": [], f"{TICKER}:close": [], f"{TICKER}:volume": []}
processed_data = {f"{TICKER}:time": [], f"{TICKER}:high": [], f"{TICKER}:low": [], f"{TICKER}:close": [], f"{TICKER}:volume": []}

for i, o in enumerate(raw_data["open"]): #o == open, the open price value of the candle stick
    if i == 0: #Skipping the first hour to calculate the percent diff using this hour
        continue

    if o == 0:
        continue

    processed_data[f"{TICKER}:time"].append(raw_data["time"][i]) #time is the same
    # processed_data[f"{TICKER}:open"].append(percent_difference(float(data["open"][i-1]), float(o))) # percent difference between the opening price of the prior candlestick vs. open of current candle
    processed_data[f"{TICKER}:high"].append(percent_difference(float(o), float(raw_data["high"][i]))) # percent diff between open and high
    processed_data[f"{TICKER}:low"].append(percent_difference(float(o), float(raw_data["low"][i]))) # percent diff between open and low
    processed_data[f"{TICKER}:close"].append(percent_difference(float(o), float(raw_data["close"][i]))) # percent diff between open and close
    processed_data[f"{TICKER}:volume"].append(percent_difference(float(raw_data["volume"][i-1]), float(raw_data["volume"][i]))) # percent difference between the colume of the prior candlestick vs. open of current candle


# processed_data = pd.DataFrame(data=processed_data, columns=[f"{TICKER}:time", f"{TICKER}:open", f"{TICKER}:high", f"{TICKER}:low", f"{TICKER}:close", f"{TICKER}:volume"])
processed_data = pd.DataFrame(data=processed_data, columns=[f"{TICKER}:time", f"{TICKER}:high", f"{TICKER}:low", f"{TICKER}:close", f"{TICKER}:volume"])
display(processed_data)

Unnamed: 0,LTCUSDT:time,LTCUSDT:high,LTCUSDT:low,LTCUSDT:close,LTCUSDT:volume
0,2017-12-13 04:00:00,4.347826,-2.177536,1.086957,363.455467
1,2017-12-13 05:00:00,18.279570,0.000000,8.243728,-36.722412
2,2017-12-13 06:00:00,0.698675,-5.298013,-3.599338,-8.383215
3,2017-12-13 07:00:00,1.672792,-2.105589,-1.418610,-53.035647
4,2017-12-13 08:00:00,1.045296,-2.404181,-2.404181,-24.920956
...,...,...,...,...,...
64390,2025-04-22 06:00:00,0.394101,-0.521231,-0.025426,53.881726
64391,2025-04-22 07:00:00,1.411495,-0.178026,1.284334,28.674150
64392,2025-04-22 08:00:00,0.590082,-0.313873,0.263653,33.322902
64393,2025-04-22 09:00:00,0.212899,-0.688791,-0.538510,-27.786993


In [None]:
############################# STATISTICAL ANALYSIS OF PROCESSED DATA #######################################
high_data_processed = processed_data["LTCUSDT:high"].to_list()

print(f"MAX HIGH IN DATASET: {max(high_data_processed)}")
print(f"OCCURRENCES OVER 0.5: {sum(x > 0.5 for x in high_data_processed)} - PERCENT OF DATASET: {sum(x > 0.5 for x in high_data_processed) / len(high_data_processed)}")
print(f"OCCURRENCES OVER 0.6: {sum(x > 0.6 for x in high_data_processed)}")
print(f"OCCURRENCES OVER 0.7: {sum(x > 0.7 for x in high_data_processed)}")
print(f"OCCURRENCES OVER 0.8: {sum(x > 0.8 for x in high_data_processed)}")
print(f"OCCURRENCES OVER 0.9: {sum(x > 0.9 for x in high_data_processed)}")
print(f"OCCURRENCES OVER 1: {sum(x > 1 for x in high_data_processed)} - PERCENT OF DATASET: {sum(x > 1 for x in high_data_processed) / len(high_data_processed)}")
print("----------------------------------------------------------------------")

close_data_processed = processed_data["LTCUSDT:close"].to_list()

print(f"MAX CLOSE IN DATASET: {max(close_data_processed)}")
print(f"OCCURRENCES OVER 0.5: {sum(x > 0.5 for x in close_data_processed)} - PERCENT OF DATASET: {sum(x > 0.5 for x in close_data_processed) / len(close_data_processed)}")
print(f"OCCURRENCES OVER 0.6: {sum(x > 0.6 for x in close_data_processed)}")
print(f"OCCURRENCES OVER 0.7: {sum(x > 0.7 for x in close_data_processed)}")
print(f"OCCURRENCES OVER 0.8: {sum(x > 0.8 for x in close_data_processed)}")
print(f"OCCURRENCES OVER 0.9: {sum(x > 0.9 for x in close_data_processed)}")
print(f"OCCURRENCES OVER 1: {sum(x > 1 for x in close_data_processed)} - PERCENT OF DATASET: {sum(x > 1 for x in close_data_processed) / len(close_data_processed)}")

In [4]:
########################### LABELING THE DATA ##################################


# column_labels = ["BTCUSDT:time"] # name of the columns for the return dataframe
column_labels = ["time"] # name of the columns for the return dataframe

# filling up the list with labels for the columns
for roundd in range(EPOCHS):
    for option in options:
        column_labels.append(f"{TICKER}:{option}{roundd}")

column_labels.append("Label")


# filling up list of data, row by row in the dataset
labelled_data_rows = [] # this list stores all the rows filled with all the data
for i in range(len(processed_data[TIME]) - EPOCHS): #looping from the third element to the third last element, with stepsize 1, if epoch=3
    data_row = []

    data_row.append(processed_data[TIME][i + EPOCHS - 1])

    for t in range(EPOCHS):
        for option in options:
            data_row.append(processed_data[f"{TICKER}:{option}"][i + t])

    if processed_data[PREDICTOR][i + EPOCHS] > THRESHOLD: # here we use the threshold
        data_row.append(1)
    else:
        data_row.append(0)

    labelled_data_rows.append(data_row)


labelled_data_frame = pd.DataFrame(labelled_data_rows, columns=column_labels)
display(labelled_data_frame)

Unnamed: 0,time,LTCUSDT:high0,LTCUSDT:low0,LTCUSDT:close0,LTCUSDT:volume0,LTCUSDT:high1,LTCUSDT:low1,LTCUSDT:close1,LTCUSDT:volume1,LTCUSDT:high2,...,LTCUSDT:volume2,LTCUSDT:high3,LTCUSDT:low3,LTCUSDT:close3,LTCUSDT:volume3,LTCUSDT:high4,LTCUSDT:low4,LTCUSDT:close4,LTCUSDT:volume4,Label
0,2017-12-13 08:00:00,4.347826,-2.177536,1.086957,363.455467,18.279570,0.000000,8.243728,-36.722412,0.698675,...,-8.383215,1.672792,-2.105589,-1.418610,-53.035647,1.045296,-2.404181,-2.404181,-24.920956,1
1,2017-12-13 09:00:00,18.279570,0.000000,8.243728,-36.722412,0.698675,-5.298013,-3.599338,-8.383215,1.672792,...,-53.035647,1.045296,-2.404181,-2.404181,-24.920956,3.163156,-0.749732,1.460193,-24.405900,0
2,2017-12-13 10:00:00,0.698675,-5.298013,-3.599338,-8.383215,1.672792,-2.105589,-1.418610,-53.035647,1.045296,...,-24.920956,3.163156,-0.749732,1.460193,-24.405900,1.129526,-3.163377,-3.142264,247.144972,1
3,2017-12-13 11:00:00,1.672792,-2.105589,-1.418610,-53.035647,1.045296,-2.404181,-2.404181,-24.920956,3.163156,...,-24.405900,1.129526,-3.163377,-3.142264,247.144972,4.257793,-1.816464,1.722008,-60.258303,1
4,2017-12-13 12:00:00,1.045296,-2.404181,-2.404181,-24.920956,3.163156,-0.749732,1.460193,-24.405900,1.129526,...,247.144972,4.257793,-1.816464,1.722008,-60.258303,10.000000,0.000000,8.928571,493.426375,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64385,2025-04-22 05:00:00,1.376498,-0.382360,0.624522,45.570745,0.253357,-0.658728,-0.291361,-53.678685,0.431911,...,-19.886501,0.381194,-0.127065,-0.114358,13.501104,0.139931,-0.381631,0.063605,-19.092926,0
64386,2025-04-22 06:00:00,0.253357,-0.658728,-0.291361,-53.678685,0.431911,-0.241362,-0.012703,-19.886501,0.381194,...,13.501104,0.139931,-0.381631,0.063605,-19.092926,0.394101,-0.521231,-0.025426,53.881726,1
64387,2025-04-22 07:00:00,0.431911,-0.241362,-0.012703,-19.886501,0.381194,-0.127065,-0.114358,13.501104,0.139931,...,-19.092926,0.394101,-0.521231,-0.025426,53.881726,1.411495,-0.178026,1.284334,28.674150,1
64388,2025-04-22 08:00:00,0.381194,-0.127065,-0.114358,13.501104,0.139931,-0.381631,0.063605,-19.092926,0.394101,...,53.881726,1.411495,-0.178026,1.284334,28.674150,0.590082,-0.313873,0.263653,33.322902,0


# Model Training

This code block divides the dataset into a training dataset and trains a new model using the **AutoGluon Tabular** predictor.

In [5]:
################################ TRAINING NEW MODEL #######################################
from autogluon.tabular import TabularDataset, TabularPredictor

# defining training size and colums to use for training within the labelled dataset
VALIDATION_SIZE = 10000
columns_to_use = ["LTCUSDT:high0", "LTCUSDT:low0", "LTCUSDT:close0", "LTCUSDT:volume0", "LTCUSDT:high1", "LTCUSDT:low1", "LTCUSDT:close1", "LTCUSDT:volume1", "LTCUSDT:high2", "LTCUSDT:low2", "LTCUSDT:close2", "LTCUSDT:volume2", "LTCUSDT:high3", "LTCUSDT:low3", "LTCUSDT:close3", "LTCUSDT:volume3", "LTCUSDT:high4", "LTCUSDT:low4", "LTCUSDT:close4", "LTCUSDT:volume4", "Label"]
LABEL = "Label"

# defining training data
training_dataframe = labelled_data_frame.iloc[:-VALIDATION_SIZE].copy()
train_data_frame2 = training_dataframe[columns_to_use]
train_tabular_dataset = TabularDataset(train_data_frame2)

# # Training model -> TabularPredictor
# predictor = TabularPredictor(label=label, eval_metric="balanced_accuracy", positive_class=1).fit(train_tabular_dataset, num_bag_folds=5, num_bag_sets=5, num_stack_levels=3)
# predictor = TabularPredictor(label=label, eval_metric="accuracy").fit(train_tabular_dataset, presets="high_quality")
predictor = TabularPredictor(label=LABEL).fit(train_tabular_dataset)



  from .autonotebook import tqdm as notebook_tqdm
No path specified. Models will be saved in: "AutogluonModels\ag-20250422_103237"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
Memory Avail:       8.37 GB / 15.92 GB (52.5%)
Disk Space Avail:   124.13 GB / 475.69 GB (26.1%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prot

# Model Evaluation

This code block divides the dataset into a validation dataset and evaluates the model using the **AutoGluons** inbuilt evaluation library. In addition the model is backtested using the validation set to measure its performance and calulate its "expected return" over the period.

In [6]:
#################### MODEL EVALUATION ################################

# Defining the testing set using the training size and columns to use
testing_dataframe = labelled_data_frame.tail(VALIDATION_SIZE).copy()
#display(testing_dataframe)
test_data_frame2 = testing_dataframe[columns_to_use]
test_tabular_dataset = TabularDataset(test_data_frame2)

######## Making predictions
y_pred = predictor.predict(test_tabular_dataset.drop(columns=[LABEL]))
display(y_pred)


#### Evaluation
eval_report = predictor.evaluate(test_tabular_dataset, detailed_report=True)
display(eval_report)

feature_importance = predictor.feature_importance(test_tabular_dataset)
display(feature_importance)


54390    0
54391    0
54392    0
54393    0
54394    0
        ..
64385    0
64386    0
64387    0
64388    0
64389    0
Name: Label, Length: 10000, dtype: int64

{'accuracy': 0.5776,
 'balanced_accuracy': 0.5242486329321396,
 'mcc': 0.07480594169314808,
 'roc_auc': 0.5561737254641453,
 'f1': 0.22693997071742314,
 'precision': 0.5317324185248714,
 'recall': 0.14425314099581202,
 'confusion_matrix':       0    1
 0  5156  546
 1  3678  620,
 'classification_report': {'0': {'precision': 0.5836540638442381,
   'recall': 0.9042441248684672,
   'f1-score': 0.709411117226197,
   'support': 5702.0},
  '1': {'precision': 0.5317324185248714,
   'recall': 0.14425314099581202,
   'f1-score': 0.22693997071742314,
   'support': 4298.0},
  'accuracy': 0.5776,
  'macro avg': {'precision': 0.5576932411845548,
   'recall': 0.5242486329321396,
   'f1-score': 0.46817554397181005,
   'support': 10000.0},
  'weighted avg': {'precision': 0.5613381406859743,
   'recall': 0.5776,
   'f1-score': 0.5020450184567259,
   'support': 10000.0}}}

Computing feature importance via permutation shuffling for 20 features using 5000 rows with 5 shuffle sets...
	1.6s	= Expected runtime (0.32s per shuffle set)
	0.64s	= Actual runtime (Completed 5 of 5 shuffle sets)


Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
LTCUSDT:close4,0.0114,0.004338,0.002095,5,0.020332,0.002468
LTCUSDT:low1,0.00088,0.001968,0.18695,5,0.004932,-0.003172
LTCUSDT:close1,0.00044,0.001299,0.245523,5,0.003115,-0.002235
LTCUSDT:high1,0.0,0.000566,0.5,5,0.001165,-0.001165
LTCUSDT:high3,-8e-05,0.000701,0.594362,5,0.001364,-0.001524
LTCUSDT:close0,-0.00016,0.000974,0.634051,5,0.001845,-0.002165
LTCUSDT:volume3,-0.00016,0.001315,0.600517,5,0.002547,-0.002867
LTCUSDT:volume4,-0.0002,0.0004,0.836918,5,0.000624,-0.001024
LTCUSDT:high2,-0.0002,0.000424,0.824349,5,0.000674,-0.001074
LTCUSDT:close3,-0.0002,0.003228,0.551747,5,0.006447,-0.006847


In [7]:
# ANALYSIS OF PREDICTIONS AND PROBABILITIES

import statistics as st

# making and processing probabilities from evaluation dataset
y_prob = predictor.predict_proba(test_tabular_dataset.drop(columns=[LABEL]))

validation_probabilities = pd.DataFrame(y_prob).reset_index(drop=True) # probability for each prediction
validation_predictions = pd.DataFrame(y_pred).reset_index(drop=True)
validation_dataset = pd.DataFrame(test_data_frame2).reset_index(drop=True)
display(validation_probabilities)
display(validation_predictions)
display(validation_dataset)

print(f"MEAN PROB 1 CLASSIFICATION: {st.mean(validation_probabilities[1].to_list())}")
print(f"MAX PROB 1 CLASSIFICATION: {max(validation_probabilities[1].to_list())}")
print(f"MIN PROB 1 CLASSIFICATION: {min(validation_probabilities[1].to_list())}")

count_above_07 = sum(1 for num in validation_probabilities[1].to_list() if num > 0.7)
print("COUNT OF NUMBERS > 0.7:", count_above_07)

count_above_08 = sum(1 for num in validation_probabilities[1].to_list() if num > 0.8)
print("COUNT OF NUMBERS > 0.8:", count_above_08)

count_above_09 = sum(1 for num in validation_probabilities[1].to_list() if num > 0.9)
print("COUNT OF NUMBERS > 0.9:", count_above_09)








Unnamed: 0,0,1
0,0.549480,0.450520
1,0.526916,0.473084
2,0.547630,0.452370
3,0.547364,0.452636
4,0.551267,0.448733
...,...,...
9995,0.548946,0.451054
9996,0.544088,0.455912
9997,0.545483,0.454517
9998,0.549473,0.450527


Unnamed: 0,Label
0,0
1,0
2,0
3,0
4,0
...,...
9995,0
9996,0
9997,0
9998,0


Unnamed: 0,LTCUSDT:high0,LTCUSDT:low0,LTCUSDT:close0,LTCUSDT:volume0,LTCUSDT:high1,LTCUSDT:low1,LTCUSDT:close1,LTCUSDT:volume1,LTCUSDT:high2,LTCUSDT:low2,...,LTCUSDT:volume2,LTCUSDT:high3,LTCUSDT:low3,LTCUSDT:close3,LTCUSDT:volume3,LTCUSDT:high4,LTCUSDT:low4,LTCUSDT:close4,LTCUSDT:volume4,Label
0,0.380862,-1.249702,-0.309450,90.322957,0.226839,-1.384909,-0.967049,-20.496046,0.361664,-0.530440,...,-30.640738,1.446306,0.000000,0.964204,9.822863,0.835621,-0.322311,0.453623,-15.581410,0
1,0.226839,-1.384909,-0.967049,-20.496046,0.361664,-0.530440,0.024111,-30.640738,1.446306,0.000000,...,9.822863,0.835621,-0.322311,0.453623,-15.581410,0.047523,-0.665320,-0.237614,-35.918018,1
2,0.361664,-0.530440,0.024111,-30.640738,1.446306,0.000000,0.964204,9.822863,0.835621,-0.322311,...,-15.581410,0.047523,-0.665320,-0.237614,-35.918018,0.976423,-0.059538,0.595380,87.172915,0
3,1.446306,0.000000,0.964204,9.822863,0.835621,-0.322311,0.453623,-15.581410,0.047523,-0.665320,...,-35.918018,0.976423,-0.059538,0.595380,87.172915,0.485437,-0.449917,0.035520,-23.132022,1
4,0.835621,-0.322311,0.453623,-15.581410,0.047523,-0.665320,-0.237614,-35.918018,0.976423,-0.059538,...,87.172915,0.485437,-0.449917,0.035520,-23.132022,0.556279,-0.071014,0.426086,-23.292236,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1.376498,-0.382360,0.624522,45.570745,0.253357,-0.658728,-0.291361,-53.678685,0.431911,-0.241362,...,-19.886501,0.381194,-0.127065,-0.114358,13.501104,0.139931,-0.381631,0.063605,-19.092926,0
9996,0.253357,-0.658728,-0.291361,-53.678685,0.431911,-0.241362,-0.012703,-19.886501,0.381194,-0.127065,...,13.501104,0.139931,-0.381631,0.063605,-19.092926,0.394101,-0.521231,-0.025426,53.881726,1
9997,0.431911,-0.241362,-0.012703,-19.886501,0.381194,-0.127065,-0.114358,13.501104,0.139931,-0.381631,...,-19.092926,0.394101,-0.521231,-0.025426,53.881726,1.411495,-0.178026,1.284334,28.674150,1
9998,0.381194,-0.127065,-0.114358,13.501104,0.139931,-0.381631,0.063605,-19.092926,0.394101,-0.521231,...,53.881726,1.411495,-0.178026,1.284334,28.674150,0.590082,-0.313873,0.263653,33.322902,0


MEAN PROB 1 CLASSIFICATION: 0.4718542718112469
MAX PROB 1 CLASSIFICATION: 0.5388067960739136
MIN PROB 1 CLASSIFICATION: 0.44011539220809937
COUNT OF NUMBERS > 0.7: 0
COUNT OF NUMBERS > 0.8: 0
COUNT OF NUMBERS > 0.9: 0


In [None]:
import matplotlib.pyplot as plt

# --- Constants ---
MINIMUM_PROBABILITY = 0.7
LEVERAGE = 1
GAIN = 0.5  # Interpreted as a percentage (e.g., 0.5% if multiplied by /100)

# --- Tracking variables ---
correct_trades = 0
total_trades = 0
bad_trades = []
all_trades = []

initial_investment = 100
current_investment = initial_investment
investment_history = []

# Go through probabilities alongside their index
for idx, prob in enumerate(validation_probabilities[1].to_list()):
    
    # Check if the predicted probability meets the threshold
    if prob >= MINIMUM_PROBABILITY:
        total_trades += 1
        
        # Check if the prediction was correct
        if validation_predictions["Label"][idx] == validation_dataset["Label"][idx]:
            correct_trades += 1
            
            # Record the "gain" in your trade list
            all_trades.append(GAIN * LEVERAGE)
            
            # Update current_investment by a factor of (1 + gain%)
            current_investment *= 1 + (GAIN / 100 * LEVERAGE)
        
        else:
            # A "bad" (wrong) trade
            # Grab the next close price; watch out for index out-of-range in real code
            true_close = validation_dataset["LTCUSDT:close4"][idx + 1]
            
            # Record the trade details
            bad_trades.append(round(true_close * LEVERAGE, 3))
            all_trades.append(true_close * LEVERAGE)

            # Update current_investment by (1 + some factor of true_close?)
            current_investment *= 1 + (true_close / 100 * LEVERAGE)
    
    else:
        all_trades.append(0)

    # In all cases, record the current investment amount
    investment_history.append(current_investment)

# --- After the loop, calculate stats ---
wrong_trades = total_trades - correct_trades
win_rate = (correct_trades / total_trades * 100) if total_trades else 0
total_return = current_investment - initial_investment

# --- Print results ---
print(f"CORRECT: {correct_trades}")
print(f"WRONG: {wrong_trades}")
print(f"NUMBER OF TRADES: {total_trades}")
print(f"WIN RATE: {round(win_rate, 2)}%")
print(f"RETURN: {round(total_return, 2)}")
print(f"INVESTMENT VALUE: {round(current_investment, 2)}")
print(f"SHARP RATIO: {calculate_sharpe_ratio(all_trades)}")
sharpe_ratio9999 = calculate_sharpe_ratio(validation_dataset["LTCUSDT:close4"].to_list())
print(f"SHARP RATIO ONLY HOLDING ASSET: {sharpe_ratio9999}")
print(f"MEAN RETURN BAD TRADES: {st.mean(bad_trades)}")
print("-" * 34)

# Print information about bad trades
for trade in bad_trades:
    print(trade)

# --- Plot the investment history ---
plt.plot(investment_history)
plt.xlabel("Trade Index")
plt.ylabel("Investment Value")
plt.title("Investment Value Over Trades")
plt.show()

In [None]:
# Analysis of returns made by trading strategy

MINIMUM_PROBABILITY = 0.7
LEVERAGE = 5
GAIN = 0.5

CORRECT = 0
NUM_TRADES = 0
BAD_TRADES = []
ALL_TRADES = []
invested = 100
invested_list = []

for index, probbb in enumerate(validation_probabilities[1].to_list()):

    # IF THE PREDICTION HAS A SUFFICIENT PROBABILITY
    if probbb >= MINIMUM_PROBABILITY:
        NUM_TRADES += 1

        # IF THE PREDICTION IS CORRECT
        if validation_predictions["Label"][index] == validation_dataset["Label"][index]:
            CORRECT += 1
            ALL_TRADES.append(GAIN * LEVERAGE)
            invested *= 1 + (GAIN / 100 * LEVERAGE)
            copyy = invested
            invested_list.append(copyy)

        # IF THE PREDICTION IS INCORRECT    
        else:
            true_close = validation_dataset["LTCUSDT:close4"][index + 1]
            bad_trad = (round(true_close * LEVERAGE, 3), round(probbb, 3))
            BAD_TRADES.append(bad_trad)
            ALL_TRADES.append(true_close * LEVERAGE)
            invested *= 1 + (true_close/ 100 * LEVERAGE)
            copyy = invested
            invested_list.append(copyy)

    else:
        copyy = invested
        invested_list.append(copyy)



print(f"CORRECT: {CORRECT}")
print(f"WRONG: {NUM_TRADES - CORRECT}")
print(f"NUMBER OF TRADES: {NUM_TRADES}")
print(f"WINRATE: {round(CORRECT / NUM_TRADES, 3) * 100}%")
print(f"RETURN: {invested - 100}")
print(f"IVESTMENT VALUE: {invested}")
print("----------------------------------")
for btbb in BAD_TRADES:
    print(btbb)


from matplotlib import pyplot as plt

plt.plot(invested_list)
plt.show()




In [None]:
##################### PROBABILITY ANALYSIS AND EXPECTED RETURNS
################# Probability analysis ###########################
import statistics as st

# Minimum required confidence for executing a trade
PROB_THRESH = 0.55
LEVERAGE = 5

# display(test_tabular_dataset)
# display(test_data_frame2[LABEL][len(labelled_data_frame) - VALIDATION_SIZE])


y_prob = predictor.predict_proba(test_tabular_dataset.drop(columns=[LABEL]))
# display(y_prob)

counter = 0
correct = 0

predicted_high_list = []
predicted_low_list = []
predicted_close_list = []

balance = 100

hour_count = 0
month_gain = []
month = 100


first_index_dataset = len(labelled_data_frame) - VALIDATION_SIZE

print("------------------- TRADES: ------------------------------")
for index, pred in enumerate(y_pred):
    try:
        index = index + first_index_dataset
        prob = y_prob[1][index]
        actual = test_data_frame2["Label"][index]

        true_high = test_data_frame2["LTCUSDT:high4"][index + 1]
        true_low = test_data_frame2["LTCUSDT:low4"][index + 1]
        true_close = test_data_frame2["LTCUSDT:close4"][index + 1]
        

        if prob > PROB_THRESH:
            counter += 1

            if pred == 1 and actual == 1:
                correct += 1
                predicted_high_list.append(true_high)
                predicted_low_list.append(true_low)
                predicted_close_list.append(true_close)

            ### LOGIC FOR CALCULATING GAIN ###
            if true_high >= 0.5:
                balance *= 1 + (0.005 * LEVERAGE)
                month *= 1 + (0.005 * LEVERAGE)
                print(f"{index}. Gain +5%")
            else:
                balance *= 1 + ((true_close / 100) * LEVERAGE)
                month *= 1 + ((true_close / 100) * LEVERAGE)
                #print(f"{index}. Close + {true_close} ---> High: {true_high}, Low: {true_low}, Close: {true_close}")
                print(f"{index}. Close +{true_close * LEVERAGE}")
    
        hour_count += 1
        if hour_count == 730:
            hour_count = 0
            month_gain.append(round(month - 100, 3))
            month = 100

    except Exception as e:
        print(f"Got following error: {e}")


month_gain.append(month - 100)

        

print("------------------------------------------")
print(f"Correct: {correct}")
print(f"Counter: {counter}")
print(f"Winrate: {correct / counter}")

print(f"AVG High: {st.mean(predicted_high_list)}")
print(f"AVG Low: {st.mean(predicted_low_list)}")
print(f"AVG Close: {st.mean(predicted_close_list)}")
print()
print(f"Balance: {balance}")
print(f"Return: {balance - 100}%")
print()
print(f"Month List: {month_gain}")
print(f"Mean month gain: {st.mean(month_gain)}")

In [None]:
############### ANALYSIS OF CLOSE VALUE PREDICTIONS ################################

# --- Constants ---
MINIMUM_PROBABILITY = 0.53
TRADING_FEE = 0.25
LEVERAGE = 1

# --- Tracking variables ---
correct_trades = 0
total_trades = 0
wrong_trades = 0

all_trades = []
good_trades = []
bad_trades = []
investment1 = 100
investment_history2 = []

# Go through probabilities alongside their index
for idx, prob in enumerate(validation_probabilities[1].to_list()):
    if idx == 9999:
        continue
    
    # Check if the predicted probability meets the threshold
    if prob >= MINIMUM_PROBABILITY:
        true_close = (validation_dataset[f"{TICKER}:close4"][idx + 1] - TRADING_FEE) * LEVERAGE
        
        if true_close >= 0:
            correct_trades += 1
            good_trades.append(true_close)
        else:
            bad_trades.append(true_close)

        total_trades += 1
        all_trades.append(true_close)
        investment1 *= 1 + (true_close / 100)
    investment_history2.append(investment1)
            

# --- After the loop, calculate stats ---
wrong_trades = total_trades - correct_trades
win_rate = (correct_trades / total_trades * 100) if total_trades else 0

# --- Print results ---
print(f"CORRECT: {correct_trades}")
print(f"WRONG: {wrong_trades}")
print(f"NUMBER OF TRADES: {total_trades}")
print(f"WIN RATE: {round(win_rate, 2)}%")
print("-" * 34)

print(f"MEAN GOOD TRADES: {st.mean(good_trades)}")
print(f"MEAN BAD TRADES: {st.mean(bad_trades)}")


for trade in all_trades:
    print(trade)

print(f"RETURN: {round(investment1 - 100, 3)} %")

# --- Plot the investment history ---
from matplotlib import pyplot as plt
plt.plot(investment_history2)
plt.xlabel("Trade Index")
plt.ylabel("Investment Value")
plt.title("Investment Value Over Trades")
plt.show()

NameError: name 'validation_probabilities' is not defined