In [None]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

# Q-Learning Agent Class
class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.95):
        self.model = self._build_model(state_size, action_size, learning_rate)
        self.gamma = gamma

    def _build_model(self, state_size, action_size, learning_rate):
        model = tf.keras.Sequential([
            layers.Dense(24, input_dim=state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate), loss='mse')
        return model

    def train(self, X, y):
        return self.model.fit(X, y, verbose=0)

    def predict(self, state):
        return self.model.predict(state)

# Deep Q-Learning Function
def deep_q_learning(env_name, episodes=1000, learning_rate=0.001):
    env = gym.make(env_name)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    agent = QLearningAgent(state_size, action_size, learning_rate)

    for episode in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])

        for time in range(500):
            action = np.argmax(agent.predict(state))
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])

            target = reward + agent.gamma * np.max(agent.predict(next_state))
            target_f = agent.predict(state)
            target_f[0][action] = target

            agent.train(state, target_f)
            state = next_state

            if done:
                break

# Sample usage with environment
# deep_q_learning(env_name='CartPole-v1', episodes=1000)
