<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Reinforcement_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random

class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, 128),
            nn.ReLU(),
            nn.Linear(128, action_dim)
        )

    def forward(self, x):
        return self.fc(x)

def train_dqn(env, model, episodes, gamma, epsilon, epsilon_decay, learning_rate):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    for episode in range(episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            if random.random() < epsilon:
                action = env.action_space.sample()
            else:
                with torch.no_grad():
                    action = torch.argmax(model(torch.tensor(state, dtype=torch.float32))).item()

            next_state, reward, done, _ = env.step(action)
            total_reward += reward

            q_target = reward + gamma * torch.max(model(torch.tensor(next_state, dtype=torch.float32))).detach()
            q_value = model(torch.tensor(state, dtype=torch.float32))[action]

            loss = criterion(q_value, q_target.unsqueeze(0))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            state = next_state
            epsilon *= epsilon_decay

        print(f'Episode {episode + 1}/{episodes}, Total Reward: {total_reward}')

# Example usage
import gym
env = gym.make('CartPole-v1')
model = DQN(state_dim=env.observation_space.shape[0], action_dim=env.action_space.n)
train_dqn(env, model, episodes=100, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, learning_rate=0.001)