In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from util.utils import tradex_features, convert_df
import os
import pickle

symbol= 'BTC/USDT'
procent= 50
shift = -7   
buy_amount = 100  # Amount in Euro to buy BTC



# Sample indicator data in a Pandas DataFrame (you should replace this with your actual data)
def _get_data():
    # self.config['Path']['2020_30m_data']
    pickle_file_name = 'data/pickle/all/30m_data_all.pkl'

    if not os.path.exists(pickle_file_name):
        print('No data has been written')
        return pd.DataFrame()  # Return an empty DataFrame instead of None for consistency

    with open(pickle_file_name, 'rb') as f:
        data_ = pickle.load(f)

    if data_.empty:
        print("Loaded data is empty.")
        return pd.DataFrame()

    data = convert_df(data_)

    if data.empty or data.isnull().values.any():
        print("Converted data is empty or contains NaN values.")
        return pd.DataFrame()

    # float(self.config['Data']['percentage'])
    percentage_to_keep = procent / 100.0
    rows_to_keep = int(len(data) * percentage_to_keep)
    data = data.head(rows_to_keep)

    print(f'Dataframe shape: {data.shape}')
    return data



In [6]:
import logging 

class RealisticTradingEnvironment:
    def __init__(self, df, initial_balance=10000):
        self.df = df
        self.balance = initial_balance
        self.shares_owned = 0
        self.net_worth = self.balance
        self.history = []
        self.current_step = 0
        self.max_steps = len(self.df) - 1
        self.done = False
        self.logger = logging.getLogger(__name__)
        self.setup_logger()

    def setup_logger(self):
        file_handler = logging.FileHandler('realistic_trading_output.log')
        file_handler.setLevel(logging.INFO)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)
        self.logger.addHandler(file_handler)

    def reset(self):
        self.balance = 10000
        self.shares_owned = 0
        self.net_worth = self.balance
        self.history = []
        self.current_step = 0
        self.done = False
        return self.get_observation()

    def step(self, action):
        current_price = self.df.iloc[self.current_step]['close']
        if action == 0:  # Buy
            buy_amount = self.balance * 0.1
            if self.balance >= buy_amount:
                num_shares_to_buy = buy_amount / (current_price * (1 + self.market_impact()))
                self.shares_owned += num_shares_to_buy
                self.balance -= buy_amount
                self.balance -= self.calculate_transaction_cost(buy_amount)
            else:
                self.logger.warning("Not enough balance to buy!")

        elif action == 1:  # Sell
            if self.shares_owned > 0:
                sell_amount = current_price * self.shares_owned * (1 - self.market_impact())
                self.balance += sell_amount
                self.balance -= self.calculate_transaction_cost(sell_amount)
                self.shares_owned = 0
            else:
                self.logger.warning("No shares to sell!")

        self.net_worth = self.balance + self.shares_owned * current_price

        if self.history:
            reward = self.net_worth - self.history[-1][0]
        else:
            reward = 0
        self.history.append((self.net_worth, action))

        self.current_step += 1
        if self.current_step == self.max_steps:
            self.done = True

        return self.get_observation(), reward, self.done, {}

    def get_observation(self):
        current_price = self.df.iloc[self.current_step]['close']
        return np.array([self.balance, self.shares_owned, current_price])

    def render(self):
        self.logger.info(f'Step: {self.current_step}')
        self.logger.info(f'Balance: {self.balance}')
        self.logger.info(f'Shares owned: {self.shares_owned}')
        self.logger.info(f'Net worth: {self.net_worth}')

    def calculate_transaction_cost(self, amount):
        return amount * 0.001  # 0.1% transaction cost

    def market_impact(self):
        # Simulate market impact
        return 0.001  # 0.1% market impact


In [7]:
import tensorflow as tf
from tensorflow.keras import layers
import logging

class DQNAgent:
    def __init__(self, state_shape, num_actions):
        self.state_shape = state_shape
        self.num_actions = num_actions
        self.epsilon = 1
        self.model = self.build_model()
        self.target_model = self.build_model()
        self.target_model.set_weights(self.model.get_weights())

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    def build_model(self):
        model = tf.keras.Sequential([
            layers.Dense(32, activation='relu', input_shape=self.state_shape),
            layers.Dense(32, activation='relu'),
            layers.Dense(self.num_actions)
        ])
        return model

    def select_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(self.num_actions)
        else:
            # Reshape state before prediction
            state = np.reshape(state, (1,) + self.state_shape)
            q_values = self.model.predict(state)
            return np.argmax(q_values[0])

    def train(self, states, actions, next_states, rewards, dones, gamma=0.99):
        # Reshape states before predicting with the model
        states = np.reshape(states, (-1,) + self.state_shape)
        next_states = np.reshape(next_states, (-1,) + self.state_shape)

        next_q_values = self.target_model.predict(next_states)
        max_next_q_values = np.max(next_q_values, axis=1)
        target_q_values = rewards + (1 - dones) * gamma * max_next_q_values

        with tf.GradientTape() as tape:
            q_values = self.model(states, training=True)
            actions_one_hot = tf.one_hot(actions, self.num_actions)
            selected_q_values = tf.reduce_sum(actions_one_hot * q_values, axis=1)
            loss = tf.reduce_mean(tf.square(selected_q_values - target_q_values))

        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))

    def update_target_network(self):
        self.target_model.set_weights(self.model.get_weights())


In [8]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Add file handler to log to a file
file_handler = logging.FileHandler('training.log')
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# Create environment
df = tradex_features('symbol', _get_data())
env = RealisticTradingEnvironment(df)
state_shape = env.get_observation().shape[1:]
num_actions = 3  # hold, Buy or sell
num_episodes = 100
target_update_frequency = 10

# Create agent
agent = DQNAgent(state_shape, num_actions)

# Train agent
for episode in range(num_episodes):
    logger.info("="*20)
    logger.info("Episode: %d", episode)
    obs = env.reset()
    done = False
    while not done:
        action = agent.select_action(obs)
        next_obs, reward, done, _ = env.step(action)
        agent.train(obs, action, next_obs, reward, done)
        obs = next_obs

    # Update target network periodically
    if episode % target_update_frequency == 0:
        agent.update_target_network()
    
    env.render()

2024-06-23 15:43:39,756 - INFO - init trade-x trend
2024-06-23 15:43:39,759 - INFO - init trade-x screener
2024-06-23 15:43:39,759 - INFO - - setting up High|Low channel
2024-06-23 15:43:39,762 - INFO - - make the waves


2024-06-23 15:43:39,768 - INFO - init trade-x Real time
2024-06-23 15:43:39,772 - INFO - - setting up the waves
2024-06-23 15:43:39,773 - INFO - init trade-x scanner
2024-06-23 15:43:39,782 - INFO - - setting up the rsi's
2024-06-23 15:43:39,782 - INFO - - setting up EMA
2024-06-23 15:43:39,791 - INFO - - setting up wave spaces
2024-06-23 15:43:39,793 - INFO - - setting up LSMA
2024-06-23 15:43:39,795 - INFO - - setting up the waves


                               open      high       low     close  \
date                                                                
2020-01-01 00:00:00+00:00   7182.43   7188.10   7170.69   7172.36   
2020-01-01 00:30:00+00:00   7172.79   7179.45   7170.15   7171.55   
2020-01-01 01:00:00+00:00   7171.43   7210.00   7171.10   7205.90   
2020-01-01 01:30:00+00:00   7205.89   7225.00   7200.00   7210.24   
2020-01-01 02:00:00+00:00   7210.38   7232.99   7206.46   7221.50   
...                             ...       ...       ...       ...   
2023-09-26 21:30:00+00:00  26151.43  26192.71  26145.44  26165.40   
2023-09-26 22:00:00+00:00  26165.39  26207.89  26161.90  26197.79   
2023-09-26 22:30:00+00:00  26197.78  26201.72  26152.65  26157.65   
2023-09-26 23:00:00+00:00  26157.65  26206.81  26150.08  26178.46   
2023-09-26 23:30:00+00:00  26178.46  26240.00  26176.35  26221.67   

                                 volume  
date                                     
2020-01-01 00:00:0

2024-06-23 15:43:40,288 - INFO - - getting the dots
2024-06-23 15:43:40,383 - INFO - - setting up the waves
2024-06-23 15:43:40,383 - INFO - - setting up VWAP
2024-06-23 15:43:41,543 - INFO - - getting the moneyflow
 5.27674666e+07 5.39399487e+07]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  mfi = ta.mfi(
 5.12234068e+07 2.05616998e+08]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  mfi = ta.mfi(
2024-06-23 15:43:41,562 - INFO - - getting the dots
2024-06-23 15:43:41,563 - INFO - - make the waves
2024-06-23 15:43:42,048 - INFO - Elapsed time: 2.29 seconds
2024-06-23 15:43:42,049 - INFO - ____________________
2024-06-23 15:43:42,050 - INFO - init trade-x trend
2024-06-23 15:43:42,051 - INFO - - setting up High|Low channel
2024-06-23 15:43:42,054 - INFO - - setting up EMA
2024-06-23 15:43:42,057 - INFO - - setting up LSMA
2024-06-23 15:43:42,561 - INFO - - setting up VWAP
2024-06-23 15:43:43,084 - INF

ValueError: Input 0 of layer "dense_3" is incompatible with the layer: expected min_ndim=2, found ndim=1. Full shape received: (None,)