In [None]:
!pip3 install pycoingecko pandas numpy matplotlib noise scikit-learn tensorflow gym setuptools

In [None]:
!pip3 install --upgrade pip tensorflow setuptools

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pycoingecko import CoinGeckoAPI
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque

In [None]:
# Fetch data from CoinGecko
cg = CoinGeckoAPI()
bitcoin_data = cg.get_coin_market_chart_by_id(id='bitcoin', vs_currency='usd', days='365')
prices = bitcoin_data['prices']
df = pd.DataFrame(prices, columns=['timestamp', 'price'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)

# Preprocess Data
scaler = MinMaxScaler(feature_range=(0, 1))
df['price'] = scaler.fit_transform(df[['price']])


In [None]:
# Create features and target
window_size = 30
features = []
targets = []

for i in range(len(df) - window_size):
    features.append(df['price'].values[i:i + window_size])
    targets.append(df['price'].values[i + window_size])

features = np.array(features)
targets = np.array(targets)

# Split the data into training and testing sets
train_size = int(len(features) * 0.8)
train_features, test_features = features[:train_size], features[train_size:]
train_targets, test_targets = targets[:train_size], targets[train_size:]

# Define Actor and Critic Networks
def create_actor(state_shape, action_shape):
    state_input = layers.Input(shape=state_shape)
    x = layers.Dense(64, activation="relu")(state_input)
    x = layers.Dense(64, activation="relu")(x)
    output = layers.Dense(action_shape[0], activation="tanh")(x)
    model = tf.keras.Model(state_input, output)
    return model

def create_critic(state_shape, action_shape):
    state_input = layers.Input(shape=state_shape)
    action_input = layers.Input(shape=action_shape)
    concat = layers.Concatenate()([state_input, action_input])
    x = layers.Dense(64, activation="relu")(concat)
    x = layers.Dense(64, activation="relu")(x)
    output = layers.Dense(1)(x)
    model = tf.keras.Model([state_input, action_input], output)
    return model

state_shape = (window_size,)
action_shape = (1,)
actor = create_actor(state_shape, action_shape)
critic = create_critic(state_shape, action_shape)

# Implement the DDPG Algorithm
class ReplayBuffer:
    def __init__(self, max_size=100000):
        self.buffer = deque(maxlen=max_size)

    def add(self, experience):
        self.buffer.append(experience)

    def sample(self, batch_size):
        indices = np.random.choice(len(self.buffer), batch_size, replace=False)
        batch = [self.buffer[idx] for idx in indices]
        states, actions, rewards, next_states, dones = map(np.stack, zip(*batch))
        return states, actions, rewards, next_states, dones

class OUActionNoise:
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None):
        self.theta = theta
        self.mean = mean
        self.std_dev = std_deviation
        self.dt = dt
        self.x_initial = x_initial
        self.reset()

    def reset(self):
        self.x_prev = self.x_initial if self.x_initial is not None else np.zeros_like(self.mean)

    def __call__(self):
        x = self.x_prev + self.theta * (self.mean - self.x_prev) * self.dt + self.std_dev * np.sqrt(self.dt) * np.random.normal(size=self.mean.shape)
        self.x_prev = x
        return x

In [None]:
# Initialize noise and replay buffer
noise = OUActionNoise(mean=np.zeros(1), std_deviation=0.2 * np.ones(1))
replay_buffer = ReplayBuffer()

# Training hyperparameters
batch_size = 64
gamma = 0.99
tau = 0.005

# Optimizers
actor_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
critic_optimizer = tf.keras.optimizers.Adam(learning_rate=0.002)

# Target networks
target_actor = create_actor(state_shape, action_shape)
target_critic = create_critic(state_shape, action_shape)
target_actor.set_weights(actor.get_weights())
target_critic.set_weights(critic.get_weights())

In [None]:
def update_target(target_weights, weights, tau):
    for (a, b) in zip(target_weights, weights):
        a.assign(b * tau + a * (1 - tau))

@tf.function
def train_step(states, actions, rewards, next_states, dones):
    rewards = tf.cast(rewards, tf.float32)
    dones = tf.cast(dones, tf.float32)
    
    with tf.GradientTape() as tape:
        target_actions = target_actor(next_states, training=True)
        y = rewards + gamma * target_critic([next_states, target_actions], training=True) * (1 - dones)
        critic_value = critic([states, actions], training=True)
        critic_loss = tf.math.reduce_mean(tf.math.square(y - critic_value))

    critic_grad = tape.gradient(critic_loss, critic.trainable_variables)
    critic_optimizer.apply_gradients(zip(critic_grad, critic.trainable_variables))

    with tf.GradientTape() as tape:
        actions = actor(states, training=True)
        critic_value = critic([states, actions], training=True)
        actor_loss = -tf.math.reduce_mean(critic_value)

    actor_grad = tape.gradient(actor_loss, actor.trainable_variables)
    actor_optimizer.apply_gradients(zip(actor_grad, actor.trainable_variables))

    update_target(target_actor.variables, actor.variables, tau)
    update_target(target_critic.variables, critic.variables, tau)

# Define clear_output function
def clear_output(wait=True):
    from IPython.display import clear_output as clear
    clear(wait=True)

In [17]:
# Training loop with live plot update
all_rewards = []

for episode in range(1000):
    state = train_features[0]
    episode_reward = 0

    for t in range(len(train_features) - 1):
        state = train_features[t]
        action = actor(tf.convert_to_tensor([state], dtype=tf.float32))[0] + noise()
        action = tf.clip_by_value(action, -1.0, 1.0)
        next_state = train_features[t + 1]
        reward = train_targets[t]
        done = t == len(train_features) - 2

        replay_buffer.add((state, action, reward, next_state, done))

        if len(replay_buffer.buffer) > batch_size:
            states, actions, rewards, next_states, dones = replay_buffer.sample(batch_size)
            train_step(states, actions, rewards, next_states, dones)

        episode_reward += reward
        state = next_state

    all_rewards.append(episode_reward)
    print(f"Episode {episode}, Reward: {episode_reward}")

    # Plotting live update
    if episode % 10 == 0:  # Update plot every 10 episodes
        clear_output(wait=True)
        plt.figure(figsize=(14, 7))
        plt.plot(df.index[:train_size], df['price'][:train_size], label='Training Data')
        plt.plot(df.index[train_size:], df['price'][train_size:], label='Test Data')
        plt.plot(df.index[window_size:train_size], train_targets[:train_size - window_size], label='Actual Prices')
        predicted_prices = [actor(tf.convert_to_tensor([train_features[i]], dtype=tf.float32))[0].numpy() for i in range(train_size - window_size)]
        plt.plot(df.index[window_size:train_size], predicted_prices, label='Predicted Prices')
        plt.xlabel('Date')
        plt.ylabel('Normalized Price')
        plt.legend()
        plt.title('Training and Test Data Split')
        plt.show()

    # Reset noise
    noise.reset()