In [None]:
import warnings
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Ignore warnings
warnings.filterwarnings("ignore")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
train_val_df = pd.read_csv("/content/drive/Shareddrives/Hackathon/btc_18_22_4h.csv")
test_df = pd.read_csv("/content/drive/Shareddrives/Hackathon/BTC_23_4h.csv")

In [None]:
test_df.drop(columns = ['Unnamed: 0'])

Unnamed: 0,datetime,open,high,low,close,volume
0,2023-01-01 05:30:00,16541.77,16559.77,16508.39,16533.04,15515.82327
1,2023-01-01 09:30:00,16533.04,16550.00,16499.01,16526.19,16532.24115
2,2023-01-01 13:30:00,16525.70,16557.00,16505.20,16556.66,15915.96701
3,2023-01-01 17:30:00,16556.66,16572.94,16533.68,16558.73,15046.09096
4,2023-01-01 21:30:00,16558.73,16623.65,16558.00,16603.08,18532.64857
...,...,...,...,...,...,...
2186,2023-12-31 13:30:00,42537.10,42899.00,42369.15,42518.36,4626.12098
2187,2023-12-31 17:30:00,42518.36,42646.27,42352.76,42457.17,3403.65176
2188,2023-12-31 21:30:00,42457.17,42719.80,42436.47,42619.04,3036.19166
2189,2024-01-01 01:30:00,42619.04,42680.36,42056.00,42283.58,5460.30980


In [None]:
train_val_df

Unnamed: 0,datetime,open,high,low,close,volume
0,2018-01-01 05:30:00,13715.65,13715.65,13155.38,13410.03,1676.204807
1,2018-01-01 09:30:00,13434.98,13818.55,13322.15,13570.35,1302.214836
2,2018-01-01 13:30:00,13569.98,13735.24,13001.13,13220.56,1319.755931
3,2018-01-01 17:30:00,13220.56,13330.00,12750.00,13247.00,1831.933153
4,2018-01-01 21:30:00,13247.00,13290.65,12940.00,13240.37,1092.337234
...,...,...,...,...,...,...
10930,2022-12-30 13:30:00,16475.10,16531.10,16458.25,16496.27,20505.906850
10931,2022-12-30 17:30:00,16495.96,16580.70,16333.00,16555.26,44689.949880
10932,2022-12-30 21:30:00,16555.26,16577.00,16515.45,16525.37,25853.486930
10933,2022-12-31 01:30:00,16525.91,16677.35,16523.91,16607.48,24063.452570


## **Moving Average**

In [None]:
train_val_df2 = train_val_df.copy()

In [None]:
train_val_df2['10_MA_Open'] = train_val_df2['open'].rolling(window=10).mean()

In [None]:
train_val_df2['10_MA_Open'] = train_val_df2['10_MA_Open'].fillna(train_val_df2['open'])

In [None]:
train_val_df2.head(15)

Unnamed: 0,datetime,open,high,low,close,volume,10_MA_Open
0,2018-01-01 05:30:00,13715.65,13715.65,13155.38,13410.03,1676.204807,13715.65
1,2018-01-01 09:30:00,13434.98,13818.55,13322.15,13570.35,1302.214836,13434.98
2,2018-01-01 13:30:00,13569.98,13735.24,13001.13,13220.56,1319.755931,13569.98
3,2018-01-01 17:30:00,13220.56,13330.0,12750.0,13247.0,1831.933153,13220.56
4,2018-01-01 21:30:00,13247.0,13290.65,12940.0,13240.37,1092.337234,13247.0
5,2018-01-02 01:30:00,13222.03,13599.7,13222.03,13380.0,1387.469883,13222.03
6,2018-01-02 05:30:00,13382.16,13850.0,13231.96,13353.78,2365.532926,13382.16
7,2018-01-02 09:30:00,13353.78,13480.84,12890.02,13343.0,2980.316053,13353.78
8,2018-01-02 13:30:00,13343.01,13617.28,13302.59,13490.0,2596.182674,13343.01
9,2018-01-02 17:30:00,13490.0,13894.86,13450.46,13690.03,3365.879508,13397.915


In [None]:
X = train_val_df.iloc[:, 1].values
def create_regression_sequences(data, lookback, forecast_size):
    sequences, targets = [], []
    for i in range(len(data) - lookback):
        seq = data[i:i + lookback]
        label = data[i + lookback:i + lookback + forecast_size]
        sequences.append(seq)
        targets.append(label)

    return np.array(sequences), np.array(targets)

# Define the lookback window and forecast size
lookback_window = 10
forecast_size = 1

# Create regression sequences
X_reg, y_reg = create_regression_sequences(X, lookback_window, forecast_size)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42, shuffle=False)

In [None]:
train_val_df.tail(10)

Unnamed: 0,datetime,open,high,low,close,volume
10925,2022-12-29 17:30:00,16598.7,16664.41,16590.64,16616.6,34295.37736
10926,2022-12-29 21:30:00,16616.6,16654.54,16590.82,16612.4,26230.77683
10927,2022-12-30 01:30:00,16612.4,16654.51,16555.57,16633.47,27491.95801
10928,2022-12-30 05:30:00,16633.47,16648.77,16579.75,16599.08,20857.62793
10929,2022-12-30 09:30:00,16598.79,16615.35,16429.0,16474.93,28945.88758
10930,2022-12-30 13:30:00,16475.1,16531.1,16458.25,16496.27,20505.90685
10931,2022-12-30 17:30:00,16495.96,16580.7,16333.0,16555.26,44689.94988
10932,2022-12-30 21:30:00,16555.26,16577.0,16515.45,16525.37,25853.48693
10933,2022-12-31 01:30:00,16525.91,16677.35,16523.91,16607.48,24063.45257
10934,2022-12-31 05:30:00,16607.48,16616.37,16550.01,16552.46,15656.04768


In [None]:
print(X_test_reg[-10:])

[[16690.75 16706.06 16650.65 16647.16 16678.52 16578.02 16595.43 16547.32
  16558.33 16556.37]
 [16706.06 16650.65 16647.16 16678.52 16578.02 16595.43 16547.32 16558.33
  16556.37 16598.7 ]
 [16650.65 16647.16 16678.52 16578.02 16595.43 16547.32 16558.33 16556.37
  16598.7  16616.6 ]
 [16647.16 16678.52 16578.02 16595.43 16547.32 16558.33 16556.37 16598.7
  16616.6  16612.4 ]
 [16678.52 16578.02 16595.43 16547.32 16558.33 16556.37 16598.7  16616.6
  16612.4  16633.47]
 [16578.02 16595.43 16547.32 16558.33 16556.37 16598.7  16616.6  16612.4
  16633.47 16598.79]
 [16595.43 16547.32 16558.33 16556.37 16598.7  16616.6  16612.4  16633.47
  16598.79 16475.1 ]
 [16547.32 16558.33 16556.37 16598.7  16616.6  16612.4  16633.47 16598.79
  16475.1  16495.96]
 [16558.33 16556.37 16598.7  16616.6  16612.4  16633.47 16598.79 16475.1
  16495.96 16555.26]
 [16556.37 16598.7  16616.6  16612.4  16633.47 16598.79 16475.1  16495.96
  16555.26 16525.91]]


In [None]:
np.shape(X_train_reg) ,np.shape(X_test_reg)

((8740, 10), (2185, 10))

In [None]:
def metrics(text, y, y_pred, reg=False):
    print(f"{text} Set Metrics:")
    r2_score_ = r2_score(y, y_pred)
    print(f"R2 Score: {r2_score_}")

    if reg:
        # Regression metrics
        mse = mean_squared_error(y, y_pred)
        mae = mean_absolute_error(y, y_pred)
        print(f"Mean Squared Error: {mse}")
        print(f"Mean Absolute Error: {mae}")
    else:
        # Classification metrics
        accuracy = accuracy_score(y, y_pred)
        precision = precision_score(y, y_pred, average='micro')
        recall = recall_score(y, y_pred, average='micro')
        f1_score_ = f1_score(y, y_pred, average='micro')

        # Print the classification metrics
        print(f"Accuracy: {accuracy}")
        print(f"Precision: {precision}")
        print(f"Recall: {recall}")
        print(f"F1 Score: {f1_score_}")
    print()

In [None]:
# Initialize the RandomForestRegressor with the best parameters
regressor = RandomForestRegressor(n_estimators=100, max_depth=None, min_samples_split=2, criterion="squared_error", min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto",
        max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None,
        random_state=12)

# Train the regressor on the training set
regressor.fit(X_train_reg, y_train_reg)

# Make predictions on the test set
y_pred_train_rf = regressor.predict(X_train_reg)
y_pred_test_rf = regressor.predict(X_test_reg)

# Evaluate the model on the test set
metrics('Train', y_pred_train_rf, y_train_reg, True)
metrics('Test', y_pred_test_rf, y_test_reg, True)

Train Set Metrics:
R2 Score: 0.9999008099988282
Mean Squared Error: 31249.718007414307
Mean Absolute Error: 86.29419756292934

Test Set Metrics:
R2 Score: 0.9938996002441327
Mean Squared Error: 608500.568550981
Mean Absolute Error: 569.955623066361



In [None]:
import numpy as np
import plotly.graph_objects as go

# Reshape arrays for plotting
y_test_reg_array = np.array(y_test_reg).reshape(-1)
y_train_reg_array = np.array(y_train_reg).reshape(-1)
y_pred_test_rf_array = np.array(y_pred_test_rf).reshape(-1)
y_pred_train_rf_array = np.array(y_pred_train_rf).reshape(-1)

# Plotting the results using Plotly
fig = go.Figure()

# Add true values and predicted values traces for the test set
fig.add_trace(go.Scatter(x=list(range(len(y_train_reg), len(y_test_reg) + len(y_train_reg))),
                         y=y_test_reg_array,
                         mode='lines',
                         name='True Values (Test)'))

fig.add_trace(go.Scatter(x=list(range(len(y_train_reg), len(y_test_reg) + len(y_train_reg))),
                         y=y_pred_test_rf_array,
                         mode='lines',
                         name='Predicted Values (Test)'))

# Add true values and predicted values traces for the train set
fig.add_trace(go.Scatter(x=list(range(len(y_train_reg))),
                         y=y_train_reg_array,
                         mode='lines',
                         name='True Values (Train)'))

fig.add_trace(go.Scatter(x=list(range(len(y_train_reg))),
                         y=y_pred_train_rf_array,
                         mode='lines',
                         name='Predicted Values (Train)'))

# Update layout
fig.update_layout(title='Random Forest Regressor Prediction',
                  xaxis_title='Time',
                  yaxis_title='Values')

# Show the plot
fig.show()


In [None]:
import pandas as pd
import numpy as np

# Function to implement a simple gap trading strategy
def gap_trading_strategy(prices, gap_threshold_percent=1.15):
    # Create a DataFrame with 'Close' prices
    data = pd.DataFrame(prices, columns=['Close'])

    # Calculate price change and identify gaps
    data['PriceChange'] = data['Close'].diff()
    data['Gap'] = np.where((data['PriceChange'].shift(-1) / data['Close']) * 100 > gap_threshold_percent, 1.0, 0.0)

    # Determine position based on gap
    data['Position'] = data['Gap'].diff()

    return data


In [None]:
def gap_trading_strategy(prices, gap_threshold_percent=2.5, short_window=10, long_window=200):
    # Create a DataFrame with 'Close' prices
    # data = pd.DataFrame(prices, columns=['datetime', 'Close'])
    # data['datetime'] = pd.to_datetime(data['datetime'])
    # data.set_index('datetime', inplace=True)

    data = pd.DataFrame(prices, columns=['Close'])
    # data['datetime'] = pd.to_datetime(data['datetime'])
    # data.set_index('datetime', inplace=True)

    # Calculate price change and identify gaps
    data['PriceChange'] = data['Close'].diff()
    data['Gap'] = np.where((data['PriceChange'].shift(-1) / data['Close']) * 100 > gap_threshold_percent, 1.0, 0.0)

    # Calculate moving averages
    data['Short_MA'] = data['Close'].rolling(window=short_window, min_periods=1).mean()
    data['Long_MA'] = data['Close'].rolling(window=long_window, min_periods=1).mean()

    # Determine position based on gap and moving averages
    data['Position'] = np.where((data['Gap'].diff() == 1) & (data['Short_MA'] > data['Long_MA']), 1.0, 0.0)
    data['Position'] = np.where((data['Gap'].diff() == -1) & (data['Short_MA'] < data['Long_MA']), -1.0, data['Position'])

    return data

# # Apply the strategy
# result = gap_trading_strategy(data)
# print(result)


In [None]:
# import pandas as pd
# import numpy as np

# def calculate_rsi(data, window=10):
#     delta = data['Close'].diff()
#     gain = (delta.where(delta > 0, 0)).rolling(window=window, min_periods=1).mean()
#     loss = (-delta.where(delta < 0, 0)).rolling(window=window, min_periods=1).mean()
#     rs = gain / loss
#     rsi = 100 - (100 / (1 + rs))
#     return rsi

# def calculate_macd(data, short_window=10, long_window=50, signal_window=10):
#     data['Short_EMA'] = data['Close'].ewm(span=short_window, min_periods=1).mean()
#     data['Long_EMA'] = data['Close'].ewm(span=long_window, min_periods=1).mean()
#     data['MACD'] = data['Short_EMA'] - data['Long_EMA']
#     data['Signal_Line'] = data['MACD'].ewm(span=signal_window, min_periods=1).mean()
#     return data['MACD'], data['Signal_Line']

# def gap_trading_strategy(prices, gap_threshold_percent=3.5, short_window=10, long_window=200, rsi_window=10, rsi_threshold=50, macd_short_window=10, macd_long_window=50, macd_signal_window=10, take_profit=0.04):
#     # Create a DataFrame with 'Close' prices
#     data = pd.DataFrame(prices, columns=['Close'])

#     # Calculate price change and identify gaps
#     data['PriceChange'] = data['Close'].diff()
#     data['Gap'] = np.where((data['PriceChange'].shift(-1) / data['Close']) * 100 > gap_threshold_percent, 1.0, 0.0)

#     # Calculate moving averages
#     data['Short_MA'] = data['Close'].rolling(window=short_window, min_periods=1).mean()
#     data['Long_MA'] = data['Close'].rolling(window=long_window, min_periods=1).mean()

#     # Calculate RSI
#     data['RSI'] = calculate_rsi(data, window=rsi_window)

#     # Calculate MACD and Signal Line
#     data['MACD'], data['Signal_Line'] = calculate_macd(data, short_window=macd_short_window, long_window=macd_long_window, signal_window=macd_signal_window)

#     # Determine position based on gap, moving averages, RSI, and MACD
#     data['Position'] = np.where(
#         (data['Gap'].diff() == 1) &
#         (data['Short_MA'] > data['Long_MA']) &
#         (data['RSI'] < rsi_threshold) &
#         (data['MACD'] > data['Signal_Line']),
#         1.0, 0.0)

#     data['Position'] = np.where(
#         (data['Gap'].diff() == -1) &
#         (data['Short_MA'] < data['Long_MA']) &
#         (data['RSI'] > 100 - rsi_threshold) &
#         (data['MACD'] < data['Signal_Line']),
#         -1.0, data['Position'])

#     # Calculate daily returns
#     data['Daily_Return'] = data['Close'].pct_change()

#     # Initialize strategy return and cumulative return
#     data['Strategy_Return'] = data['Daily_Return'] * data['Position'].shift(1)
#     data['Cumulative_Return'] = (1 + data['Strategy_Return']).cumprod()

#     # Implement take-profit level
#     take_profit_reached = (data['Cumulative_Return'] - data['Cumulative_Return'].cummax().shift(1)) / data['Cumulative_Return'].cummax().shift(1)
#     data.loc[take_profit_reached > take_profit, 'Position'] = 0

#     # Recalculate strategy returns with take-profit logic
#     data['Strategy_Return'] = data['Daily_Return'] * data['Position'].shift(1)
#     data['Cumulative_Return'] = (1 + data['Strategy_Return']).cumprod()

#     return data


In [None]:
# def gap_trading_strategy(prices, gap_threshold_percent=3.5, short_window=10, long_window=200, take_profit=0.04):
#     # Create a DataFrame with 'Close' prices
#     data = pd.DataFrame(prices, columns=['Close'])

#     # Calculate price change and identify gaps
#     data['PriceChange'] = data['Close'].diff()
#     data['Gap'] = np.where((data['PriceChange'].shift(-1) / data['Close']) * 100 > gap_threshold_percent, 1.0, 0.0)

#     # Calculate moving averages
#     data['Short_MA'] = data['Close'].rolling(window=short_window, min_periods=1).mean()
#     data['Long_MA'] = data['Close'].rolling(window=long_window, min_periods=1).mean()

#     # Determine initial position based on gap and moving averages
#     data['Position'] = np.where((data['Gap'].diff() == 1) & (data['Short_MA'] > data['Long_MA']), 1.0, 0.0)
#     data['Position'] = np.where((data['Gap'].diff() == -1) & (data['Short_MA'] < data['Long_MA']), -1.0, data['Position'])

#     # Calculate daily returns
#     data['Daily_Return'] = data['Close'].pct_change()

#     # Initialize strategy return and cumulative return
#     data['Strategy_Return'] = data['Daily_Return'] * data['Position'].shift(1)
#     data['Cumulative_Return'] = (1 + data['Strategy_Return']).cumprod()

#     # Implement take-profit level
#     take_profit_reached = (data['Cumulative_Return'] - data['Cumulative_Return'].cummax().shift(1)) / data['Cumulative_Return'].cummax().shift(1)
#     data.loc[take_profit_reached > take_profit, 'Position'] = 0

#     # Recalculate strategy returns with take-profit logic
#     data['Strategy_Return'] = data['Daily_Return'] * data['Position'].shift(1)
#     data['Cumulative_Return'] = (1 + data['Strategy_Return']).cumprod()

#     return data

In [None]:
prices = [100, 102, 105, 107, 115, 120, 119, 121]
result = gap_trading_strategy(prices)
print(result)


   Close  PriceChange  Gap    Short_MA     Long_MA  Position
0    100          NaN  1.0  100.000000  100.000000       0.0
1    102          2.0  1.0  101.000000  101.000000       0.0
2    105          3.0  1.0  102.333333  102.333333       0.0
3    107          2.0  1.0  103.500000  103.500000       0.0
4    115          8.0  1.0  105.800000  105.800000       0.0
5    120          5.0  0.0  108.166667  108.166667       0.0
6    119         -1.0  1.0  109.714286  109.714286       0.0
7    121          2.0  0.0  111.125000  111.125000       0.0


In [None]:
import pandas as pd
def merge_and_calculate_profit(X_test_reg, regressor, initial_balance=100000,gap_threshold_percent=3.5,stop_loss_percent=0.005):
    # Initialize variables and parameters
    history = []  # List to store historical prices and forecasts
    history.extend(X_test_reg[0])  # Initialize history with the first observation
    history.pop()  # Remove the last element to maintain the desired lookback window
    actions = []  # List to store trading actions
    position = 0  # Current trading position (0: neutral, 1: long, -1: short)
    total_profit = 0  # Total profit accumulated during trading
    balance = initial_balance  # Initial balance for trading
    stop_loss = 700  # Initial stop-loss value
    shares_bought = 0 # Initial number of shares bought
    shares_sold_short = 0
    buy_price =0
    price = 0
    # Create an empty DataFrame to store the log
    log_df = pd.DataFrame(columns=['open', 'signal'])

    # Loop through each observation in the test set
    for idx, i in enumerate(X_test_reg):
        price = i[-1]
        history.append(price)
        forecast = regressor.predict([i])[0]
        history.append(forecast)

        # Apply the gap trading strategy to get the current trading action
        gap_strategy_data = gap_trading_strategy(history, gap_threshold_percent)
        history.pop()

        current_action = gap_strategy_data.iloc[-2, gap_strategy_data.columns.get_loc('Position')]

        # Execute trading actions based on the current position and trading strategy
        if position == 1:
            if buy_price - price > stop_loss:
                # Sell
                position = 0
                profit_loss = shares_bought * (price - buy_price)
                total_profit += profit_loss
                balance = shares_bought * price
                shares_bought = 0
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [-1],'Stop_loss':[stop_loss],  'Model_Action':['Stop Loss'], 'Forecast': [forecast], 'Action': ['Sell'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
                                       ignore_index=True)
            elif current_action == 1:
                # Hold
                log_df = pd.concat([log_df,
                    pd.DataFrame({'open': [price],'signal': [0],'Stop_loss':[stop_loss],  'Model_Action':['Buy'],  'Forecast': [forecast] , 'Action': ['Hold'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                    ignore_index=True)
            elif current_action == -1:
                # Sell
                position = 0
                profit_loss = shares_bought * (price - buy_price)
                total_profit += profit_loss
                balance = shares_bought * price
                shares_bought = 0
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [-1],'Stop_loss':[stop_loss],  'Model_Action':['Sell'], 'Forecast': [forecast], 'Action': ['Sell'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
                                       ignore_index=True)
                # Short Sell
                position = -1
                share_price_short = price
                shares_sold_short = balance / price
                short_sell_price = price * shares_sold_short
                shares_bought = -shares_sold_short
                balance = 0
                stop_loss = share_price_short*stop_loss_percent
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [-1],'Stop_loss':[stop_loss],  'Model_Action':'Sell Short Sell', 'Forecast': [forecast], 'Action': ['Sell'], 'Current Position':[shares_bought] , 'Balance': [balance], 'Profit/Loss': [0]})],
                                       ignore_index=True)
            else:
                # Hold
                log_df = pd.concat([log_df,
                    pd.DataFrame({'open': [price],'signal': [0],'Stop_loss':[stop_loss],  'Model_Action':['Hold'], 'Forecast': [forecast], 'Action': ['Hold'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                    ignore_index=True)

        elif position == 0:
            if current_action == 1:
                # Buy
                position = 1
                buy_price = price
                shares_bought = balance / buy_price
                balance = 0
                stop_loss = buy_price * stop_loss_percent
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [1],'Stop_loss':[stop_loss],  'Model_Action':['Buy'], 'Forecast': [forecast], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                                       ignore_index=True)
            elif current_action == -1:

                # Hold
                log_df = pd.concat([log_df,
                    pd.DataFrame({'open': [price],'signal': [0],'Stop_loss':[stop_loss],  'Model_Action':['Sell'], 'Forecast': [forecast], 'Action': ['Hold'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                    ignore_index=True)
            else:
                # Hold
                log_df = pd.concat([log_df,
                    pd.DataFrame({'open': [price],'signal': [0],'Stop_loss':[stop_loss],  'Model_Action':['Hold'], 'Forecast': [forecast], 'Action': ['Hold'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                    ignore_index=True)
        else:
            # print(idx,price - share_price_short , stop_loss)
            if price - share_price_short > stop_loss:
                # Close short sell
                position = 0
                profit_loss = shares_sold_short * (share_price_short - price)
                total_profit += profit_loss
                balance = short_sell_price + profit_loss
                shares_bought=0
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price], 'signal': [1],'Stop_loss':[stop_loss],  'Model_Action':['Buy Close short sell'], 'Forecast': [forecast], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
                                       ignore_index=True)
                # Buy
                position = 1
                buy_price = price
                shares_bought = balance / buy_price
                balance = 0
                stop_loss = buy_price * stop_loss_percent
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [1],'Stop_loss':[stop_loss],  'Model_Action':['Buy'], 'Forecast': [forecast], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                                       ignore_index=True)

            elif current_action == 1:
                # Close short sell
                position = 0
                profit_loss = shares_sold_short * (share_price_short - price)
                total_profit += profit_loss
                balance = short_sell_price + profit_loss
                shares_bought = 0
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price], 'signal': [1],'Stop_loss':[stop_loss],  'Model_Action':['Close short sell stop loss'], 'Forecast': [forecast], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
                                       ignore_index=True)
                # Buy
                position = 1
                buy_price = price
                shares_bought = balance / buy_price
                balance = 0
                stop_loss = buy_price * stop_loss_percent
                log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [1],'Stop_loss':[stop_loss],  'Model_Action':['Buy'], 'Forecast': [forecast], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                                       ignore_index=True)

            elif current_action == -1:
                # Hold
                log_df = pd.concat([log_df,
                    pd.DataFrame({'open': [price],'signal': [0],'Stop_loss':[stop_loss],  'Model_Action':['Sell'], 'Forecast': [forecast], 'Action': ['Hold'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                    ignore_index=True)
            else:
                # Hold
                log_df = pd.concat([log_df,
                    pd.DataFrame({'open': [price],'signal': [0],'Stop_loss':[stop_loss],  'Model_Action':['Hold'], 'Forecast': [forecast], 'Action': ['Hold'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [0]})],
                    ignore_index=True)
    # If there's an open position at the end, close it at the last price
    if position == 1:
        # Sell
        profit_loss = shares_bought * (price - buy_price)
        total_profit += profit_loss
        log_df.drop(log_df.tail(1).index, inplace=True)
        log_df = pd.concat([log_df,
            pd.DataFrame({'open': [price],'signal': [-1],'Model_Action': ['Sell'], 'Action': ['Sell'], 'Current Position': [0],
             'Balance': [shares_bought * price], 'Profit/Loss': [profit_loss]})], ignore_index=True)

    elif position == -1:
        # Close short sell
        position = 0
        profit_loss = shares_sold_short * (share_price_short - price)
        total_profit += profit_loss
        balance = short_sell_price + profit_loss
        shares_bought=0+6
        log_df.drop(log_df.tail(1).index, inplace=True)
        log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [1],'Stop_loss': [stop_loss],  'Model_Action':['Close short sell'], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
                                       ignore_index=True)
    return log_df, total_profit, history,gap_strategy_data, position


In [None]:
# Use the trained regression model to merge and calculate profits
log_df, total_profit, history,gap_strategy_data, position = merge_and_calculate_profit(X_test_reg, regressor,gap_threshold_percent=2.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit)

Total Profit: -15968.34996999398


In [None]:
log_df

Unnamed: 0,open,signal,Stop_loss,Model_Action,Forecast,Action,Current Position,Balance,Profit/Loss
0,45728.28,0,700.0000,Hold,45629.4491,Hold,0.0,100000.00000,0.0
1,46216.93,0,700.0000,Hold,45819.6190,Hold,0.0,100000.00000,0.0
2,46813.21,0,700.0000,Hold,46920.9891,Hold,0.0,100000.00000,0.0
3,47194.73,0,700.0000,Hold,47266.4951,Hold,0.0,100000.00000,0.0
4,46758.87,0,700.0000,Hold,47018.2255,Hold,0.0,100000.00000,0.0
...,...,...,...,...,...,...,...,...,...
2180,16598.79,0,91.2762,Hold,16698.3849,Hold,0.0,84031.65003,0.0
2181,16475.10,0,91.2762,Hold,16660.8788,Hold,0.0,84031.65003,0.0
2182,16495.96,0,91.2762,Hold,16694.5578,Hold,0.0,84031.65003,0.0
2183,16555.26,0,91.2762,Hold,16766.6860,Hold,0.0,84031.65003,0.0


In [None]:
new_df = train_val_df.iloc[len(X_test_reg)*(-1)-2:]
df2= log_df.iloc[:,:2]
merged_df = pd.merge(new_df, df2, on='open', how='left')
merged_df['signal']=merged_df['signal'].fillna(-1)

In [None]:
# If there's an open position at the end, close it at the last price
if position == 1:
        merged_df.loc[-1, "signal"] = -1
        # log_df.drop(log_df.tail(1).index, inplace=True)
        # log_df = pd.concat([log_df,
        #     pd.DataFrame({'open': [price],'signal': [-1],'Model_Action': ['Sell'], 'Action': ['Sell'], 'Current Position': [0],
        #      'Balance': [shares_bought * price], 'Profit/Loss': [profit_loss]})], ignore_index=True)

elif position == -1:
        # Close short sell
        position = 0
        merged_df.loc[-1, "signal"] = 1
        # shares_bought=0+6
        # log_df.drop(log_df.tail(1).index, inplace=True)
        # log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [1],'Stop_loss': [stop_loss],  'Model_Action':['Close short sell'], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
        #                                ignore_index=True)

In [None]:
new_df

Unnamed: 0,datetime,open,high,low,close,volume
8748,2021-12-31 21:30:00,48005.37,48118.45,45678.96,45728.28,9090.79287
8749,2022-01-01 01:30:00,45728.28,46569.97,45678.00,46216.93,5543.95691
8750,2022-01-01 05:30:00,46216.93,46949.99,46208.37,46813.20,3495.20465
8751,2022-01-01 09:30:00,46813.21,47555.55,46591.23,47194.73,3522.79888
8752,2022-01-01 13:30:00,47194.73,47344.69,46715.39,46758.87,2729.67811
...,...,...,...,...,...,...
10930,2022-12-30 13:30:00,16475.10,16531.10,16458.25,16496.27,20505.90685
10931,2022-12-30 17:30:00,16495.96,16580.70,16333.00,16555.26,44689.94988
10932,2022-12-30 21:30:00,16555.26,16577.00,16515.45,16525.37,25853.48693
10933,2022-12-31 01:30:00,16525.91,16677.35,16523.91,16607.48,24063.45257


In [None]:
merged_df[merged_df.isna().any(axis=1)]

Unnamed: 0,datetime,open,high,low,close,volume,signal


In [None]:
merged_df

Unnamed: 0,datetime,open,high,low,close,volume,signal
0,2021-12-31 21:30:00,48005.37,48118.45,45678.96,45728.28,9090.79287,-1
1,2022-01-01 01:30:00,45728.28,46569.97,45678.00,46216.93,5543.95691,0
2,2022-01-01 05:30:00,46216.93,46949.99,46208.37,46813.20,3495.20465,0
3,2022-01-01 09:30:00,46813.21,47555.55,46591.23,47194.73,3522.79888,0
4,2022-01-01 13:30:00,47194.73,47344.69,46715.39,46758.87,2729.67811,0
...,...,...,...,...,...,...,...
2188,2022-12-30 13:30:00,16475.10,16531.10,16458.25,16496.27,20505.90685,0
2189,2022-12-30 17:30:00,16495.96,16580.70,16333.00,16555.26,44689.94988,0
2190,2022-12-30 21:30:00,16555.26,16577.00,16515.45,16525.37,25853.48693,0
2191,2022-12-31 01:30:00,16525.91,16677.35,16523.91,16607.48,24063.45257,0


In [None]:
merged_df.to_csv("log_year_22", index=False)

In [None]:
merged_df['signal'].unique()

array([-1,  0,  1])

In [None]:
def map_actions_to_colors(log_df):
    # Map actions to colors
    log_df_copy = log_df.copy()
    # log_df_copy.loc[(log_df_copy['Action'] == 'Sell') & (log_df_copy['Profit/Loss'] < 0), 'Action'] = 'Lost_sell'
    action_colors = {'Buy': 'green', 'Sell': 'red', 'Hold': 'Blue'}
    log_df_copy['Color'] = log_df_copy['Action'].map(action_colors)
    return log_df_copy

def plot_open_vs_action(log_df):
    # Create line plot for opens
    fig = go.Figure()

    # Plot opens as lines
    fig.add_trace(go.Scatter(x=log_df.index, y=log_df['open'], mode='lines', name='open'))

    # Plot Buy and Sell actions as markers
    action_colors = {'Buy': 'green', 'Sell': 'red'}
    for action, color in action_colors.items():
        if action in log_df['Action'].unique():
            subset = log_df[log_df['Action'] == action]
            fig.add_trace(go.Scatter(x=subset.index, y=subset['open'],
                                     mode='markers', marker=dict(color=color),
                                     name=action))

    # Customize the layout
    fig.update_layout(title='open vs. Action (Buy and Sell)',
                      xaxis_title='Index',
                      yaxis_title='open_price',
                      showlegend=True)

    # Show the plot
    fig.show()
log_df_mapped = map_actions_to_colors(log_df)
plot_open_vs_action(log_df_mapped)


In [None]:
test_df

Unnamed: 0.1,Unnamed: 0,datetime,open,high,low,close,volume
0,0,2023-01-01 05:30:00,16541.77,16559.77,16508.39,16533.04,15515.82327
1,1,2023-01-01 09:30:00,16533.04,16550.00,16499.01,16526.19,16532.24115
2,2,2023-01-01 13:30:00,16525.70,16557.00,16505.20,16556.66,15915.96701
3,3,2023-01-01 17:30:00,16556.66,16572.94,16533.68,16558.73,15046.09096
4,4,2023-01-01 21:30:00,16558.73,16623.65,16558.00,16603.08,18532.64857
...,...,...,...,...,...,...,...
2186,2186,2023-12-31 13:30:00,42537.10,42899.00,42369.15,42518.36,4626.12098
2187,2187,2023-12-31 17:30:00,42518.36,42646.27,42352.76,42457.17,3403.65176
2188,2188,2023-12-31 21:30:00,42457.17,42719.80,42436.47,42619.04,3036.19166
2189,2189,2024-01-01 01:30:00,42619.04,42680.36,42056.00,42283.58,5460.30980


In [None]:
X_23 = test_df.loc[:, 'open'].values

In [None]:
print(X_23)

[16541.77 16533.04 16525.7  ... 42457.17 42619.04 42283.58]


In [None]:
print(len(X_23))

2191


In [None]:
X_test_23_reg, y_test_23_reg = create_regression_sequences(X_23, lookback_window, forecast_size)

In [None]:
print(X_test_23_reg)

[[16541.77 16533.04 16525.7  ... 16662.38 16721.27 16735.51]
 [16533.04 16525.7  16556.66 ... 16721.27 16735.51 16734.66]
 [16525.7  16556.66 16558.73 ... 16735.51 16734.66 16737.56]
 ...
 [42066.94 41972.81 41947.16 ... 42175.99 42537.1  42518.36]
 [41972.81 41947.16 41893.96 ... 42537.1  42518.36 42457.17]
 [41947.16 41893.96 42374.97 ... 42518.36 42457.17 42619.04]]


In [None]:
# Use the trained regression model to merge and calculate profits
log_df_23, total_profit_23, history_23,gap_strategy_data_23, position = merge_and_calculate_profit(X_test_23_reg, regressor,gap_threshold_percent=2.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit_23)

Total Profit: 91774.49102235842


In [None]:
new_df = test_df.copy()
df2= log_df_23.iloc[:,:2]
merged_df = pd.merge(new_df, df2, on='open', how='left')
merged_df['signal']=merged_df['signal'].fillna(-1)
# position = -1

In [None]:
# If there's an open position at the end, close it at the last price
if position == 1:
        merged_df['signal'][-1] = -1
        # log_df.drop(log_df.tail(1).index, inplace=True)
        # log_df = pd.concat([log_df,
        #     pd.DataFrame({'open': [price],'signal': [-1],'Model_Action': ['Sell'], 'Action': ['Sell'], 'Current Position': [0],
        #      'Balance': [shares_bought * price], 'Profit/Loss': [profit_loss]})], ignore_index=True)

elif position == -1:
        # Close short sell
        position = 0
        merged_df['signal'][-1] = 1
        # shares_bought=0+6
        # log_df.drop(log_df.tail(1).index, inplace=True)
        # log_df = pd.concat([log_df, pd.DataFrame({'open': [price],'signal': [1],'Stop_loss': [stop_loss],  'Model_Action':['Close short sell'], 'Action': ['Buy'], 'Current Position': [shares_bought], 'Balance': [balance], 'Profit/Loss': [profit_loss]})],
        #                                ignore_index=True)

In [None]:
merged_df

Unnamed: 0.1,Unnamed: 0,datetime,open,high,low,close,volume,signal
0,0,2023-01-01 05:30:00,16541.77,16559.77,16508.39,16533.04,15515.82327,-1
1,1,2023-01-01 09:30:00,16533.04,16550.00,16499.01,16526.19,16532.24115,-1
2,2,2023-01-01 13:30:00,16525.70,16557.00,16505.20,16556.66,15915.96701,-1
3,3,2023-01-01 17:30:00,16556.66,16572.94,16533.68,16558.73,15046.09096,-1
4,4,2023-01-01 21:30:00,16558.73,16623.65,16558.00,16603.08,18532.64857,-1
...,...,...,...,...,...,...,...,...
2200,2186,2023-12-31 13:30:00,42537.10,42899.00,42369.15,42518.36,4626.12098,0
2201,2187,2023-12-31 17:30:00,42518.36,42646.27,42352.76,42457.17,3403.65176,0
2202,2188,2023-12-31 21:30:00,42457.17,42719.80,42436.47,42619.04,3036.19166,0
2203,2189,2024-01-01 01:30:00,42619.04,42680.36,42056.00,42283.58,5460.30980,-1


In [None]:
merged_df[merged_df.isna().any(axis=1)]

Unnamed: 0.1,Unnamed: 0,datetime,open,high,low,close,volume,signal


In [None]:
merged_df.to_csv("log_year_23", index=False)

In [None]:
log_df_mapped_23 = map_actions_to_colors(log_df_23)
plot_open_vs_action(log_df_mapped_23)


In [None]:
test_df_18_19 = pd.read_csv("/content/drive/Shareddrives/Hackathon/4h_data/btc_18_19_4h.csv")

In [None]:
test_df_18_19

Unnamed: 0.1,Unnamed: 0,datetime,open,high,low,close,volume
0,0,2018-01-01 05:30:00,13715.65,13715.65,13155.38,13410.03,1676.204807
1,1,2018-01-01 09:30:00,13434.98,13818.55,13322.15,13570.35,1302.214836
2,2,2018-01-01 13:30:00,13569.98,13735.24,13001.13,13220.56,1319.755931
3,3,2018-01-01 17:30:00,13220.56,13330.00,12750.00,13247.00,1831.933153
4,4,2018-01-01 21:30:00,13247.00,13290.65,12940.00,13240.37,1092.337234
...,...,...,...,...,...,...,...
4359,4359,2019-12-31 05:30:00,7246.00,7269.00,7200.00,7236.39,4057.299249
4360,4360,2019-12-31 09:30:00,7236.60,7268.48,7228.40,7250.37,2664.670591
4361,4361,2019-12-31 13:30:00,7250.30,7256.94,7217.50,7243.39,4498.864156
4362,4362,2019-12-31 17:30:00,7243.64,7320.00,7188.88,7195.96,7983.926818


In [None]:
X_18 = test_df_18_19.iloc[:, 2].values

In [None]:
print(X_18)

[13715.65 13434.98 13569.98 ...  7250.3   7243.64  7195.  ]


In [None]:
X_test_18_reg, y_test_18_reg = create_regression_sequences(X_18, lookback_window, forecast_size)

In [None]:
# Use the trained regression model to merge and calculate profits
log_df_18, total_profit_18, history_18,gap_strategy_data_18, position = merge_and_calculate_profit(X_test_18_reg, regressor,gap_threshold_percent=2.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit_18)

ValueError: too many values to unpack (expected 4)

In [None]:
log_df_mapped_23 = map_actions_to_colors(log_df_23)
plot_open_vs_action(log_df_mapped_23)


In [None]:
test_df_18_20 = pd.read_csv("/content/drive/Shareddrives/Hackathon/4h_data/btc_18_20_4h.csv")

In [None]:
test_df_18_20

Unnamed: 0.1,Unnamed: 0,datetime,open,high,low,close,volume
0,0,2018-01-01 05:30:00,13715.65,13715.65,13155.38,13410.03,1676.204807
1,1,2018-01-01 09:30:00,13434.98,13818.55,13322.15,13570.35,1302.214836
2,2,2018-01-01 13:30:00,13569.98,13735.24,13001.13,13220.56,1319.755931
3,3,2018-01-01 17:30:00,13220.56,13330.00,12750.00,13247.00,1831.933153
4,4,2018-01-01 21:30:00,13247.00,13290.65,12940.00,13240.37,1092.337234
...,...,...,...,...,...,...,...
6554,6554,2020-12-31 05:30:00,28875.55,29300.00,28120.67,29024.00,17005.770282
6555,6555,2020-12-31 09:30:00,29024.01,29285.00,28738.10,29155.25,10242.046375
6556,6556,2020-12-31 13:30:00,29155.24,29210.83,28580.00,28910.30,10508.482930
6557,6557,2020-12-31 17:30:00,28910.29,28989.03,27850.00,28770.00,19597.147389


In [None]:
X_18_20= test_df_18_20.iloc[:, 2].values

In [None]:
print(X_18_20)

[13715.65 13434.98 13569.98 ... 29155.24 28910.29 28782.01]


In [None]:
X_test_18_20_reg, y_test_18_20_reg = create_regression_sequences(X_18_20, lookback_window, forecast_size)

In [None]:
# Use the trained regression model to merge and calculate profits
log_df_18_20, total_profit_18_20, history_18_20,gap_strategy_data_18_20 = merge_and_calculate_profit(X_test_18_20_reg, regressor,gap_threshold_percent=1.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit_18_20)

Total Profit: 666596.0966412202


In [None]:
log_df_mapped_23 = map_actions_to_colors(log_df_18_20)
plot_open_vs_action(log_df_mapped_18_20)


NameError: name 'log_df_mapped_18_20' is not defined

In [None]:
test_df_18_21 = pd.read_csv("/content/drive/Shareddrives/Hackathon/4h_data/btc_18_21_4h.csv")
X_18_21= test_df_18_21.iloc[:, 2].values
print(X_18_21)
X_test_18_21_reg, y_test_18_21_reg = create_regression_sequences(X_18_21, lookback_window, forecast_size)

[13715.65 13434.98 13569.98 ... 47191.08 47975.76 48005.37]


In [None]:
# Use the trained regression model to merge and calculate profits
log_df_18_21, total_profit_18_21, history_18_21,gap_strategy_data_18_21 = merge_and_calculate_profit(X_test_18_21_reg, regressor,gap_threshold_percent=3.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit_18_21)

Total Profit: 267700.4485144951


In [None]:
test_df_18_22 = pd.read_csv("/content/drive/Shareddrives/Hackathon/4h_data/btc_18_22_4h.csv")
X_18_22= test_df_18_22.iloc[:, 2].values
print(X_18_22)
X_test_18_22_reg, y_test_18_22_reg = create_regression_sequences(X_18_22, lookback_window, forecast_size)

[13715.65 13818.55 13735.24 ... 16577.   16677.35 16616.37]


In [None]:
# Use the trained regression model to merge and calculate profits
log_df_18_22, total_profit_18_22, history_18_22,gap_strategy_data_18_22 = merge_and_calculate_profit(X_test_18_22_reg, regressor,gap_threshold_percent=1.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit_18_22)

Total Profit: 257630.38202322667


In [None]:
test_df_18_23 = pd.read_csv("/content/drive/Shareddrives/Hackathon/btc_18_23_4h.csv")
X_18_23= test_df_18_23.iloc[:, 2].values
print(X_18_23)
X_test_18_23_reg, y_test_18_23_reg = create_regression_sequences(X_18_23, lookback_window, forecast_size)

[13715.65 13434.98 13569.98 ... 42457.17 42619.04 42283.58]


In [None]:
# Use the trained regression model to merge and calculate profits
log_df_18_23, total_profit_18_23, history_18_23,gap_strategy_data_18_23 = merge_and_calculate_profit(X_test_18_23_reg, regressor,gap_threshold_percent=3.5,stop_loss_percent=0.005)

# Print the total profit obtained during the trading period
print("Total Profit:", total_profit_18_23)

Total Profit: 238279.3078662512
