# Market Making with Reinforcement Learning

In [None]:
# Data Handling
import pandas as pd
import numpy as np

# Generic
from collections import defaultdict
from tqdm.auto import tqdm

# Data Visualization
import matplotlib.pyplot as plt 

# Reinforcement Learning
import gym

# Custom Modules
from Environment.market_making import MarketMakerEnv, PhiTransform
from data.data_gen import DataGenerator
from algorithms import TileCodingQLearningAgent

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Set the seed for reproducibility
SEED = 42
np.random.seed(SEED)

## Setup the training environment

In [None]:
lob_data = DataGenerator._generator('data/lob.csv', levels=1)
lob_data = lob_data.head(1200).values

In [None]:
# Create the environment
env = MarketMakerEnv(
    lob_data=lob_data,
    horizon=1000,
    phi_transorm=PhiTransform.PnL_asymm_dampened(-1),
)
initial_state = env.reset()
display(initial_state)

In [None]:
# Define the agent
agent = TileCodingQLearningAgent(
    env=env,
    alpha=1e-3,
    gamma=0.99,
    epsilon=0.1,
    epsilon_decay=0.995,
    epsilon_min=1e-2,
)

# Train the agent
agent.train(n_episodes=200)

In [None]:
# Test the agent
rewards = agent.test(n_episodes=1)

In [None]:
# Smooth the rewards
sma_window = 10
rewards_sma = pd.Series(rewards).rolling(window=sma_window).mean()

# Plot the rewards
# plt.plot(rewards, label='Reward')
plt.plot(rewards_sma, label='SMA 10 Reward')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.legend()
plt.show()