<a href="https://colab.research.google.com/github/Charish53/RL_lab/blob/main/LAB03/QLearningFrozenLake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Libraries

In [45]:
import numpy as np
import gymnasium as gym
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import time

### epsilon Greedy

In [46]:
def epsilon_greedy(Q, state, epsilon, n_actions):
    if np.random.rand() < epsilon:
        return np.random.randint(n_actions)  # Random action
    return np.argmax(Q[state])

### SARSA

In [47]:
def sarsa(env, alpha=0.1, gamma=0.99, epsilon=0.1, episodes=100000):
    Q = np.zeros((env.observation_space.n, env.action_space.n))
    for episode in range(episodes):
        state, _ = env.reset()  # Reset environment; state is the first element.
        action = epsilon_greedy(Q, state, epsilon, env.action_space.n)
        done = False

        while not done:
            next_state, reward, done, truncated, info = env.step(action)
            next_action = epsilon_greedy(Q, next_state, epsilon, env.action_space.n)
            # SARSA update rule
            Q[state, action] += alpha * (reward + gamma * Q[next_state, next_action] - Q[state, action])
            state, action = next_state, next_action  # Move to next state and action

    return Q

### Q_Learning

In [48]:
def q_learning(env, alpha=0.1, gamma=0.99, epsilon=0.1, episodes=100000):
    Q = np.zeros((env.observation_space.n, env.action_space.n))
    for episode in range(episodes):
        state, _ = env.reset()  # Reset environment; state is the first element.
        done = False

        while not done:
            action = epsilon_greedy(Q, state, epsilon, env.action_space.n)
            next_state, reward, done, truncated, info = env.step(action)
            # Q-Learning update rule
            Q[state, action] += alpha * (reward + gamma * np.max(Q[next_state]) - Q[state, action])
            state = next_state  # Move to next state

    return Q

### SARSA Q_Table FrozenLake

In [49]:
    env = gym.make("FrozenLake-v1", is_slippery=False)
    print("Training SARSA on FrozenLake...")
    Q_sarsa = sarsa(env, alpha=0.1, gamma=0.99, epsilon=0.1, episodes=100000)
    print("SARSA Q-Table:\n", Q_sarsa)

Training SARSA on FrozenLake...
SARSA Q-Table:
 [[0.68067759 0.61383406 0.80467923 0.66031243]
 [0.74076227 0.         0.8350028  0.73363401]
 [0.71744086 0.81351031 0.67084335 0.72870209]
 [0.81028585 0.         0.67828806 0.61581475]
 [0.63895772 0.66555252 0.         0.69033359]
 [0.         0.         0.         0.        ]
 [0.         0.91115338 0.         0.83589108]
 [0.         0.         0.         0.        ]
 [0.79353517 0.         0.85815952 0.75947717]
 [0.85751145 0.8949638  0.85050265 0.        ]
 [0.84864402 0.97719653 0.         0.88041691]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.82980971 0.98982779 0.92017701]
 [0.96701505 0.98547337 1.         0.89157942]
 [0.         0.         0.         0.        ]]


### Q_Learning Q_Table frozen Lake

In [53]:
    env = gym.make("FrozenLake-v1", is_slippery=False)
    print("\nTraining Q-Learning on FrozenLake...")
    Q_qlearning = q_learning(env, alpha=0.1, gamma=0.99, epsilon=0.3, episodes=100000)
    print("Q-Learning Q-Table:\n", Q_qlearning)


Training Q-Learning on FrozenLake...
Q-Learning Q-Table:
 [[0.94148015 0.95099005 0.95099005 0.94148015]
 [0.94148015 0.         0.96059601 0.95099005]
 [0.95099005 0.970299   0.95099005 0.96059601]
 [0.96059601 0.         0.93953707 0.94030666]
 [0.95099005 0.96059601 0.         0.94148015]
 [0.         0.         0.         0.        ]
 [0.         0.9801     0.         0.96059601]
 [0.         0.         0.         0.        ]
 [0.96059601 0.         0.970299   0.95099005]
 [0.96059601 0.9801     0.9801     0.        ]
 [0.970299   0.99       0.         0.970299  ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.9801     0.99       0.970299  ]
 [0.9801     0.99       1.         0.9801    ]
 [0.         0.         0.         0.        ]]


### SARSA Q_Table CliffWalker

In [51]:
  env = gym.make("CliffWalking-v0")
  print("Training SARSA on CliffWalking...")
  Q_sarsa = sarsa(env, alpha=0.1, gamma=0.99, epsilon=0.1, episodes=1200)
  print("Final SARSA Q-Table:\n", Q_sarsa)

Training SARSA on CliffWalking...
Final SARSA Q-Table:
 [[ -13.31782447  -13.1788714   -13.30294512  -13.2934783 ]
 [ -12.56207396  -12.40693623  -12.54525129  -12.48636977]
 [ -11.75001949  -11.56987784  -11.63542331  -11.92843725]
 [ -10.85916322  -10.7194378   -10.84936916  -10.91224139]
 [  -9.92375732   -9.8571983    -9.89338419   -9.94187222]
 [  -9.12228297   -8.98449611   -9.05489729   -9.41061688]
 [  -8.15006945   -8.08862185   -8.12662478   -8.21286576]
 [  -7.3465297    -7.16730235   -7.17454671   -7.98385428]
 [  -6.49554954   -6.2543914    -6.25771418   -6.77133554]
 [  -5.52347933   -5.32325896   -5.35044016   -5.59828789]
 [  -4.71241957   -4.52998359   -4.30515947   -4.66808369]
 [  -3.93336949   -4.04978469   -3.42421023   -4.16322278]
 [ -13.90002942  -13.46817356  -15.37511431  -14.19405442]
 [ -13.03713597  -12.57118596  -14.00919657  -14.24113407]
 [ -12.22134963  -11.66662239  -13.02846722  -13.24349862]
 [ -11.44847566  -10.57798333  -11.19411461  -11.93461323]


### Q_Learning Q_Table CliffWalker

In [52]:
    env = gym.make("CliffWalking-v0")
    print("\nTraining Q-Learning on CliffWalking...")
    Q_qlearning = q_learning(env, alpha=0.1, gamma=0.99, epsilon=0.1, episodes=1200)
    print("Final Q-Learning Q-Table:\n", Q_qlearning)


Training Q-Learning on CliffWalking...
Final Q-Learning Q-Table:
 [[ -10.9279817   -10.93223937  -11.00422494  -10.92226927]
 [ -10.54851019  -10.49973929  -10.6158014   -10.52331431]
 [  -9.9085869    -9.92246271   -9.94984883   -9.98787649]
 [  -9.30883364   -9.2894555    -9.29427151   -9.308698  ]
 [  -8.59887478   -8.55873624   -8.57153238   -8.64297735]
 [  -7.84666307   -7.79525965   -7.85956363   -7.84445286]
 [  -7.02052052   -7.02952831   -7.03638488   -7.1267622 ]
 [  -6.35135734   -6.21392152   -6.28509841   -6.34430427]
 [  -5.41392846   -5.42117453   -5.46145843   -5.42300292]
 [  -4.75958889   -4.62736331   -4.64932327   -4.92698933]
 [  -3.81531482   -3.80172623   -3.80972012   -3.89822671]
 [  -2.95690327   -2.9471953    -2.94722563   -3.02865099]
 [ -11.34561939  -11.31619226  -11.39377273  -11.31111799]
 [ -10.84184621  -10.8219095   -10.85724848  -10.82073006]
 [ -10.1997763   -10.17019438  -10.18082256  -10.318707  ]
 [  -9.57207156   -9.38700899   -9.3931301    -9