In [1]:
import numpy as np
import imageio
import matplotlib.pyplot as plt
import gymnasium as gym
import re
from PIL import Image, ImageDraw, ImageFont
import os
import openai

openai.api_base = "http://localhost:4201/v1" 
openai.api_key = "" 



In [2]:
goal_state = (3,3)
system_prompt = f'''
There exists a 3x3 grid. You need to go from your current posision to tile {goal_state}. What path on the grid should you take to get to the goal. Think step by step. Here is an example:

Current position: (1,1)
 To reach tile (3,3), we can follow this path:

1. Move from tile (1,1) to tile (2,1): This can be done by moving one unit to the right, which is allowed in a 3x3 grid.
2. Move from tile (2,1) to tile (2,2): This can be done by moving one unit down, which is also allowed in a 3x3 grid.
3. Move from tile (2,2) to tile (3,2): This can be done by moving one unit to the right, which is allowed in a 3x3 grid.
4. Move from tile (3,2) to tile (3,3): This can be done by moving one unit up, which is also allowed in a 3x3 grid.

Therefore, the path to reach tile (3,3) is: (1,1) -> (2,1) -> (2,2) -> (3,2) -> (3,3).

The response should follow the above template.

'''

In [3]:

history = [{"role":"system", "content":system_prompt}]
eng = [(1,1),(1,2),(1,3),(1,4),(2,1),(2,2),(2,3),(2,4),(3,1),(3,2),(3,3),(3,4),(4,1),(4,2),(4,3),(4,4)]


# Initialize the FrozenLake environment
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode='rgb_array')

In [4]:
def clean_str(input_string):
    pattern = r',\s(\d)'
    result_string = re.sub(pattern, r',\1', input_string)
    return result_string

def state_to_action(state,env_state,with_eng=False):
    x1,y1 = env_state
    state1 = state.strip()
    x2,y2 = int(state1[1]),int(state1[3].strip())
    if with_eng==False:
        if x1-x2 < 0: #RIGHT
            return 2
        elif x1-x2 > 0: #LEFT
            return 0
        elif y1-y2 < 0: #UP
            return 3
        elif y1-y2 > 0: #DOWN
            return 1
    elif with_eng==True:
        if x1-x2 < 0: #RIGHT
            return 2,'RIGHT'
        elif x1-x2 > 0: #LEFT
            return 0,'LEFT'
        elif y1-y2 < 0: #UP
            return 3,'UP'
        elif y1-y2 > 0: #DOWN
            return 1,'DOWN'

def extract_nposition(response,position):
    x = response.split('\n')
    y = clean_str(x[-1])
    y = y.split('->')
    print(y)
    #try:
    #    result = y.index(position.replace(" ",""))+2 
    #except:
    #result = y.index(position.replace(" ",""))+2 
    return y[2]

def env_to_eng(x):
    return eng[x]


def render_to_image(env):
    """Convert the text-based rendering to an image."""
    img = env.render()
    return img

In [6]:
def action_generator(hist,env_state,flag=False):
    x = env_to_eng(env_state)
    print(f'Input:{x}')    
    user_message = {"role":"user","content":f"Current position: {x}"}
    hist.append(user_message)
    completion = openai.ChatCompletion.create(
    model="local-model", 
    messages=hist,
    temperature = 0.8
    )
    response = completion.choices[0].message['content']
    next_state = extract_nposition(response,str(x)) 
    action, pr_action = state_to_action(next_state,x,with_eng=flag) # turn flag to False in order to remove  output line
    print(f'Output:{pr_action}')
    #print(f'\nReasoning:{response}')
    return action

action_generator(history,1)

Input:(1, 2)
['Therefore, the path to reach tile (3,3) is: (1,2) ', ' (2,2) ', ' (3,2) ', ' (3,3).']
Output:RIGHT

Reasoning: To reach tile (3,3), we can follow this path:

1. Move from tile (1,2) to tile (2,2): This can be done by moving one unit down, which is also allowed in a 3x3 grid.
2. Move from tile (2,2) to tile (3,2): This can be done by moving one unit to the right, which is allowed in a 3x3 grid.
3. Move from tile (3,2) to tile (3,3): This can be done by moving one unit up, which is also allowed in a 3x3 grid.

Therefore, the path to reach tile (3,3) is: (1,2) -> (2,2) -> (3,2) -> (3,3).


2

In [7]:
def evaluate_and_save_gif(num_episodes=100, filename="frozenlake.gif"):
    """
    Evaluate a random policy, save the rendered outputs to a GIF, and return aggregated rewards.
    """
    frames = []
    rewards = []

    for episode in range(num_episodes):
        state = env.reset()[0]
        episode_reward = 0
        done = False
        

        # Add initial state to frames
        frames.append(render_to_image(env))

        while not done:
            # Choose a random action
            print(state)
            action = action_generator(history,state,True)

            # Take a step in the environment
            state, reward, done, _, info = env.step(action)
            episode_reward += reward

            # Add the new state to frames
            frames.append(render_to_image(env))

        rewards.append(episode_reward)

    # Save frames as a gif
    imageio.mimsave(filename, [np.array(f) for f in frames], duration=0.5)
    print(f"GIF saved as {filename}")

    return rewards

# Evaluate the random policy, save the GIF, and retrieve rewards
num_episodes = 1
rewards = evaluate_and_save_gif(num_episodes=num_episodes, filename="frozenlake.gif")

# Plotting the aggregated rewards
plt.figure(figsize=(10, 5))
plt.plot(np.arange(num_episodes), rewards, alpha=0.6, color='blue')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Performance of Random Policy on FrozenLake-v1 over {} episodes'.format(num_episodes))
plt.show()


0
Input:(1, 1)
['Therefore, the path to reach tile (3,3) is: (1,1) ', ' (2,1) ', ' (2,2) ', ' (3,2) ', ' (3,3).']
Output:RIGHT

Reasoning: To reach tile (3,3), we can follow this path:

1. Move from tile (1,1) to tile (2,1): This can be done by moving one unit to the right, which is allowed in a 3x3 grid.
2. Move from tile (2,1) to tile (2,2): This can be done by moving one unit down, which is also allowed in a 3x3 grid.
3. Move from tile (2,2) to tile (3,2): This can be done by moving one unit to the right, which is allowed in a 3x3 grid.
4. Move from tile (3,2) to tile (3,3): This can be done by moving one unit up, which is also allowed in a 3x3 grid.

Therefore, the path to reach tile (3,3) is: (1,1) -> (2,1) -> (2,2) -> (3,2) -> (3,3).
1
Input:(1, 2)
['Therefore, the path to reach tile (3,3) is: (1,2) ', ' (1,3) ', ' (2,3) ', ' (3,3).']
Output:RIGHT

Reasoning: To reach tile (3,3), we can follow this path from current position (1,2):

1. Move from current position (1,2) to tile (1,

IndexError: list index out of range