# Install Dependencies

In [1]:
!pip install stable-baselines3
!pip install gym
!pip install pandas

You should consider upgrading via the '/home/face9/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
You should consider upgrading via the '/home/face9/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
You should consider upgrading via the '/home/face9/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

# Import dependencies

In [1]:
from gym import Env, Space, spaces
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.envs import IdentityEnvMultiDiscrete
from stable_baselines3.common.evaluation import evaluate_policy

import numpy as np
import random
from IPython.display import clear_output
import random
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

from time import sleep
from itertools import repeat

# Import Words

In [2]:
wordleAnswers = pd.read_csv('../wordle/data/wordleAnswers.csv', header=0).values
wordleAnswers = [word[0] for word in wordleAnswers]
playList = pd.read_csv('../wordle/data/wordleWithValues.csv', header=None).sort_values(by=1, ascending=False).values

# Normalise usage to 0-1000
playList = [[word[0], min(1000, word[1] / 100000)] for word in playList]
print('Most used:')
print(playList[0:10])
print('\n# Import dependenciesLeast used:')
print(playList[-10:-1])

Most used:
[['which', 1000], ['their', 1000], ['would', 1000], ['other', 1000], ['about', 1000], ['there', 1000], ['these', 1000], ['could', 1000], ['first', 1000], ['where', 1000]]

# Import dependenciesLeast used:
[['ysame', 6e-05], ['ulnad', 6e-05], ['exeem', 6e-05], ['zooea', 6e-05], ['moyls', 5e-05], ['sluse', 5e-05], ['sowms', 4e-05], ['arefy', 4e-05], ['gyeld', 4e-05]]


# Helper utilities

In [3]:
def numToChar(num):
    return chr(ord('a') + int(num) - 1)

def numsToWord(numArr):
    word = ''
    for num in numArr:
        word += numToChar(num)
        
    return word
    
def charToNum(char):
    return ord(char) - 97 + 1

def wordToNums(word):
    numArr = np.zeros(5, dtype=np.int64)
    for i in range(5):
        numArr[i] = charToNum(word[i])
        
    return numArr
    

In [4]:
def getStartWord(threshold=7000):
    limit = min(len(playList), threshold) - 1
    randomIndex = random.randint(0, limit)
    return playList[randomIndex][0]
    

In [5]:
def populateWordChoices(choiceState, words):
    limit = len(choiceState)
    
    ratingState = np.zeros((200), dtype=np.float64)
    
    wordChoices = []
    
    for i in range(limit):
        if i < len(words):
            choiceState[i] = [charToNum(char) for char in words[i][0]]
            ratingState[i] = words[i][1]
        else:
            choiceState[i] = np.zeros([5], dtype=np.uint64)
            ratingState[i] = 0
    
    return choiceState, ratingState

In [6]:
def flatten(arr):
    return [item for sublist in arr for item in sublist]

def getSuggestedWords(board):
    # No board or first play
    if board[0][0][1] == 0:
        return playList[:200]
    
    badChars = []
    greenChars = [''] * 5
    yellowChars = [[], [], [], [], []]
    
    suggestList = []
    
    for word in board:
        for x, charVal in enumerate(word):
            char = numToChar(charVal[0])
            status = charVal[1]
            
            if status == 0:
                break    
            elif status == 1 and char not in badChars:
                badChars += char
            elif status == 2:
                yellowChars[x] += char
            elif status == 3:
                greenChars[x] = char
            
    for entry in playList:
        playWord = entry[0]
        isWordValid = True
        for x, playChar in enumerate(playWord):
            hasGreenMismatch = greenChars[x] and playChar != greenChars[x]
            hasBadChars = playChar in badChars and playWord.count(playChar) > max(flatten(yellowChars).count(playChar), greenChars.count(playChar))
            hasYellowCharInPosition = playChar in yellowChars[x]
            
            if hasGreenMismatch or hasBadChars or hasYellowCharInPosition:
                isWordValid = False
                break
        
        for yellowChar in [item for sublist in yellowChars for item in sublist]:
            if yellowChar not in playWord:
#                 print(f'grand: {yellowChar} not in {entry[0]}')
                isWordValid = False
                break
        
        if isWordValid:
#             print(f'{entry[0]} is valid!')
            suggestList.append(entry)
        
        if len(suggestList) == 200:
            break
    
    return suggestList

# Environment (Wordle simulator)

In [7]:
class WordleTestEnv(Env):
    def __init__(self, targetWords=None, noSpoiler=False, showActionList=False):
        self.targetWords = targetWords
        self.noSpoiler = noSpoiler
        self.showActionList = showActionList
 
        actionNum = 200
        
        self.action_space = spaces.Discrete(actionNum)
        
        board = np.full((6, 5, 2), [27, 4], dtype=np.uint8)
        boardFlat = board.flatten()
        
        wordChoices = np.full((actionNum, 5), [27], dtype=np.uint8)
        wordChoicesFlat = wordChoices.flatten()

        ratings = spaces.Box(low=np.zeros((actionNum)), high=np.full((actionNum), [1000]), dtype=np.float64)
        
        self.observation_space = spaces.Dict({
            'board': spaces.MultiDiscrete(boardFlat, dtype=np.uint8),
            'choices': spaces.MultiDiscrete(wordChoicesFlat, dtype=np.uint8),
            'ratings': ratings
        })
        
        self.state = {
            'board': board,
            'choices': wordChoices,
            'ratings': ratings
        }
        
        self.scores = [0] * 6
        self.scorePlayed = [0] * 5
        self.gamesWon = 0
        self.gamesPlayed = 0
        self.rewardHistory = []

    def step(self, action):
        self.action = action
        actionVal = self.state['choices'][action]
        actionWord = numsToWord(actionVal) if actionVal[0] != 0 else None
        self.attempts += 1
        
        if actionWord:
            reward, winner = self.playWord(actionWord)
            self.row += 1
            self.attempts = 0
        else:
            reward = 0
            winner = False
            
        if winner:
            reward = 7 - self.row
        else:
            reward = 0
        
        self.rewardHistory.append(reward)
        
        suggestList = getSuggestedWords(self.state['board'])
        self.state['choices'], self.state['ratings'] = populateWordChoices(self.state['choices'], suggestList)
        
        self.showStats(reward)
        
        info = {}
        
        shouldBail = self.attempts > 1
        
        done = True if winner or self.row == 6 or self.state['choices'][0][1] == 0 or shouldBail else False
        
        if done:
            self.gamesPlayed += 1
            self.showStats(reward)
            #sleep(0.5)
            
        
        return self._getObs(), reward, done, info
    
    def render(self, mode='live', **kwargs):
        # Render the environment to the screen
        if mode == 'file':
            pass
    
    def reset(self):
        state = self.state
        state['board'] = np.zeros((6, 5, 2), dtype=np.uint64)
        
#         state['choices'] = np.zeros((100, 5), dtype=np.uint8)
        
        suggestList = getSuggestedWords(state['board'])
        state['choices'], state['ratings'] = populateWordChoices(state['choices'], suggestList)
        
#         for row in state['choices']:
#             row = np.zeros(5, dtype=np.uint8)
    
        
    
        if self.targetWords:
            targetWordLen = len(self.targetWords)
            self.startWord = self.targetWords[self.gamesPlayed % targetWordLen]
        else:
            self.startWord = getStartWord()
        
        self.row = 0
        self.state = state
        self.attempts = 0
        return self._getObs()
    
    def playWord(self, actionWord):
        reward = 0
        winner = False
        
        if actionWord == self.startWord:
            winner = True
            self.gamesWon += 1
            self.scores[self.row] += 1
        
        #Apply action
        matchedChars = []
        potentialReward = 0
        greenCount = 0
        
        # Calculate greens first 
        for i, char in enumerate(actionWord):
            if char == self.startWord[i]:
                matchedChars += char
        
        for i, char in enumerate(actionWord):
            charVal = charToNum(char)
            startWordCharMatches = self.startWord.count(char)
            charMatchesSoFar = matchedChars.count(char)
              
            if actionWord[i] == self.startWord[i]:
                letterState = 3
                greenCount += 1
                potentialReward += 5
                
                self.scorePlayed[i] = 2
                matchedChars += char
                
            elif startWordCharMatches > charMatchesSoFar:
                letterState = 2
                
                for j, targetChar in enumerate(self.startWord):
                    #if char == targetChar and self.scorePlayed[j] == 0:
                    reward += 1
                    self.scorePlayed[j] = 1
                        
                matchedChars += char
            else:
                letterState = 1
                
            # Apply to state
            self.state['board'][self.row][i] = [charVal, letterState]
        
        # Apply reward for multi-greens
        reward += (potentialReward * greenCount)
        
        return reward, winner
    
    def showStats(self, reward=None):
        yellowText = '\x1b[6;30;43m'
        greenText = '\x1b[5;30;42m'
        greyText = '\x1b[0;37;1m'
        redText = '\x1b[0;30;41m'
        endColText = '\x1b[0m'
        
        clear_output(wait=True)
        if not self.noSpoiler:
            print('Target word:', self.startWord, '\n')

        print(f'Last action value: {self.action}')
        
        maxWinScore = max(self.scores)
        winScoreModifier = 1 if maxWinScore < 20 else 20 / maxWinScore
        
        for i, wordData in enumerate(self.state['board']):
            # Result column
            rowText = ''
            for letterData in wordData:
                letter = numToChar(int(letterData[0])) if not self.noSpoiler else ' '
                code = int(letterData[1])
                if code == 0 and letterData[0] > 0:
                    rowText += greyText + letter + endColText
                elif code == 0:
                    rowText += ' '
                if code == 1:
                    rowText += letter
                if code == 2:
                    rowText += yellowText + letter + endColText
                if code == 3:
                    rowText += greenText + letter + endColText
            
            #Score Column
            rowScore = self.scores[i]
            
            scoreCharLen = len(str(rowScore))
            bufferText = ' ' * (5)
            scoreText = str(rowScore) if rowScore > 0 else ''
            
            rowText += f'{bufferText}{i + 1}|{greenText}{scoreText}{" " * int(((rowScore * winScoreModifier) - scoreCharLen))}{endColText}'
            print(rowText)

        # Losses row
        losses = self.gamesPlayed - self.gamesWon
        lossCharLen = len(str(rowScore))
        if losses:
            print(f'{bufferText * 2}X|{redText}{losses}{" " * (int(losses * winScoreModifier) - lossCharLen)}{endColText}')
        winRatioText = '0%'
        if self.gamesPlayed > 0:
            winRatioModifier = 100 / self.gamesPlayed
            winPercent = round(self.gamesWon * winRatioModifier)
            winRatioText = f'({winPercent}%)'
        
        print(f'{bufferText * 2}Wins: {self.gamesWon}/{self.gamesPlayed} {winRatioText}')
        
        if reward is not None:
            print(f'{bufferText * 2}Reward: {reward} Average: {self.getRewardAverage()} Score: {self.getScore()}')
        
        # Word choices
        print()
        
        if self.showActionList:
            for i, choiceRow in enumerate(self.state['choices']):
                if choiceRow[0] == 0:
                    break

                choiceWord = numsToWord(choiceRow)
                print(f'{i}: {choiceWord} - {self.state["ratings"][i]}')

    def getRewardAverage(self):
        total = sum(self.rewardHistory)
        count = len(self.rewardHistory)
        
        return round(total / max(1, count), 2)
    
    def getScore(self):
        score = sum([(index + 1) * score for [index, score] in enumerate(self.scores)]) + ((self.gamesPlayed - self.gamesWon) * 10)
        return score
    
    def _getObs(self):
        return {'board': self.state['board'].flatten(), 'choices': self.state['choices'].flatten(), 'ratings': self.state['ratings']}

# Check environment with random data

In [8]:
env = WordleTestEnv()
# Import dependencies
env.observation_space.sample()
# print(test['board'].flatten().shape)
# print(test['choices'].flatten().shape)

OrderedDict([('board',
              array([ 3,  2, 16,  2,  5,  0, 26,  3, 17,  1, 23,  3, 12,  2,  5,  1, 11,
                      2, 20,  3, 12,  2,  2,  2,  6,  0, 16,  2, 24,  1, 22,  0, 26,  0,
                      5,  2, 13,  3, 26,  0,  0,  0, 12,  3, 16,  0,  2,  1,  6,  3,  3,
                      2, 24,  2, 24,  0,  4,  3,  4,  0], dtype=uint8)),
             ('choices',
              array([19, 26, 15,  5, 18, 17,  1, 19,  7, 13, 15, 21, 13, 25, 14, 26, 25,
                     24, 20, 13, 26, 21,  5, 17, 26,  0,  7, 23, 25,  9,  5, 24,  1,  9,
                     25,  9, 10,  6, 23, 10, 11, 18,  7, 12, 12, 12,  8,  8, 22, 12, 22,
                     21,  0, 17, 25,  5, 18, 15, 15, 17, 12, 14,  5,  4, 15,  0,  0, 22,
                      8, 19, 26,  4,  5,  8, 17, 19, 13,  7,  3, 21,  0, 21, 23,  2, 16,
                     21, 11, 12, 20,  0, 25,  0, 14, 19,  2, 25, 16, 21, 19, 19, 11,  8,
                     14, 22, 13,  8,  8, 20, 17,  5, 17, 20, 18,  9, 18, 17,  

In [10]:
from stable_baselines3.common.env_checker import check_env
check_env(env)

Target word: wimps 

Last action value: 140
[6;30;43mi[0mdea[5;30;42ms[0m     1|[5;30;42m[0m
          2|[5;30;42m[0m
          3|[5;30;42m[0m
          4|[5;30;42m[0m
          5|[5;30;42m[0m
          6|[5;30;42m[0m
          Wins: 0/0 0%
          Reward: 0 Average: 0.0 Score: 0



In [11]:
env = WordleTestEnv()

episodes = 10
for episode in range(1, episodes+1):
    env.reset()
    done = False
    score = 0 
    
    while not done:
#         env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward

    print('Episode:{} Score:{}'.format(episode, score))
    

Target word: swami 

Last action value: 173
do[6;30;43mi[0mng     1|[5;30;42m[0m
b[6;30;43mi[0mll[6;30;43ms[0m     2|[5;30;42m[0m
[5;30;42ms[0mht[6;30;43mi[0mk     3|[5;30;42m[0m
          4|[5;30;42m[0m
          5|[5;30;42m[0m
          6|[5;30;42m[0m
          X|[0;30;41m10         [0m
          Wins: 0/10 (0%)
          Reward: 0 Average: 0.0 Score: 100

Episode:10 Score:0


# Create baselines Model for agent

In [12]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.env_util import make_vec_env

In [13]:
def make_env(rank, seed=0):
    """
    Utility function for multiprocessed env.
    
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = WordleTestEnv()
        # Important: use a different seed for each environment
        env.seed(seed + rank)
        return env

    print('Making env: ', rank)
    return _init

In [14]:
netArch=[256, 256, 128]

In [15]:
tbLogPath = '/home/face9/logs'
env = WordleTestEnv()
env = DummyVecEnv([lambda: env])
model = PPO('MultiInputPolicy', env, learning_rate=1e-4, verbose = 1, tensorboard_log=tbLogPath, policy_kwargs={'net_arch':netArch})
# model = PPO.load('/home/face9/dev/wordle/models/PPO_RLW_782')

Using cpu device


In [16]:
env = WordleTestEnv(['abode', 'hello'], noSpoiler=False)
evaluate_policy(model, env,  n_eval_episodes=8)
print(env.getScore())

Target word: hello 

Last action value: 72
f[6;30;43mo[0mrc[6;30;43me[0m     1|[5;30;42m[0m
aw[6;30;43me[0mt[5;30;42mo[0m     2|[5;30;42m[0m
          3|[5;30;42m[0m
          4|[5;30;42m[0m
          5|[5;30;42m[0m
          6|[5;30;42m[0m
          X|[0;30;41m8       [0m
          Wins: 0/8 (0%)
          Reward: 0 Average: 0.0 Score: 80

80


In [None]:
lowestScore = 10000
realWords = 300

while(True):
    env = WordleTestEnv()
    env = DummyVecEnv([lambda: env])
    model.env = env
    model.learn(total_timesteps=32768)
    testEnv = WordleTestEnv(wordleAnswers[0:realWords], noSpoiler=True)
    evaluate_policy(model, testEnv, n_eval_episodes = realWords)
    score = testEnv.getScore()
    
    if score < lowestScore:
        model.save(f'/home/face9/dev/wordle/models/DevClub_{score}')
        lowestScore = score
        print(f'Model saved. Score: {score} Lowest Score: {lowestScore}')
        sleep(3)
    else:
        print(f'Model NOT saved. Score: {score} Lowest Score: {lowestScore}')
        sleep(3)

Target word: iller 

Last action value: 135
f[6;30;43mi[0mna[6;30;43ml[0m     1|[5;30;42m2 [0m
          2|[5;30;42m3  [0m
          3|[5;30;42m7      [0m
          4|[5;30;42m5    [0m
          5|[5;30;42m[0m
          6|[5;30;42m[0m
          X|[0;30;41m1340                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

states = env.observation_space.shape
actions = env.action_space.n

In [75]:
tbLogPath = '/home/face9/logs'
!tensorboard --logdir={tbLogPath}

2022-02-22 23:29:42.032344: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.8.0 at http://localhost:6006/ (Press CTRL+C to quit)
^C


In [243]:
def build_model(states, actions):
    model = Sequential()
#     model.add(Reshape((6, 10), input_shape=(1, 6, 5, 2)))
#     model.add(LSTM(64, activation='tanh', recurrent_activation='sigmoid', recurrent_dropout=0, unroll=False, use_bias='True'))
#     model.add(LSTM(128, activation='tanh', return_sequences=False))
    model.add(Dense(2048, activation='relu', input_shape=(1, 6, 5, 2)))
    model.add(Dense(2048, activation='relu'))
    model.add(Dense(2048, activation='relu'))
    model.add(Flatten())
    model.add(Dense(actions, activation='relu'))
    model.add(Dense(actions, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [244]:
del model 

In [245]:
model = build_model(states, actions)

In [246]:
model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_85 (Dense)             (None, 1, 6, 5, 2048)     6144      
_________________________________________________________________
dense_86 (Dense)             (None, 1, 6, 5, 2048)     4196352   
_________________________________________________________________
dense_87 (Dense)             (None, 1, 6, 5, 2048)     4196352   
_________________________________________________________________
flatten_15 (Flatten)         (None, 61440)             0         
_________________________________________________________________
dense_88 (Dense)             (None, 26)                1597466   
_________________________________________________________________
dense_89 (Dense)             (None, 26)                702       
_________________________________________________________________
dense_90 (Dense)             (None, 26)              

In [None]:
model = keras.models.load_model('/home/face9/dev/OpenAI-Reinforcement-Learning-with-Custom-Environment/wordle')

2022-02-18 11:45:14.314492: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 202.30MiB (rounded to 212123648)requested by op dense_11/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:45:14.314551: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:45:14.314562: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:45:14.314568: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 18, Chunks in use: 18. 11.2KiB allocated for chunks. 11.2KiB in use in bin. 9.9KiB client-requested

2022-02-18 11:45:24.315825: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 202.30MiB (rounded to 212123648)requested by op dense_19/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:45:24.315874: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:45:24.315883: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:45:24.315887: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 18, Chunks in use: 18. 11.2KiB allocated for chunks. 11.2KiB in use in bin. 9.9KiB client-requested

2022-02-18 11:45:34.317026: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 202.30MiB (rounded to 212123648)requested by op dense_23/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:45:34.317077: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:45:34.317086: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:45:34.317091: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 18, Chunks in use: 18. 11.2KiB allocated for chunks. 11.2KiB in use in bin. 9.9KiB client-requested

2022-02-18 11:45:44.318289: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 202.30MiB (rounded to 212123648)requested by op dense_27/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:45:44.318350: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:45:44.318361: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:45:44.318366: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 18, Chunks in use: 18. 11.2KiB allocated for chunks. 11.2KiB in use in bin. 9.9KiB client-requested

2022-02-18 11:45:54.319909: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 15.00MiB (rounded to 15728640)requested by op dense_22/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:45:54.319956: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:45:54.319966: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:45:54.319972: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 19, Chunks in use: 19. 11.8KiB allocated for chunks. 11.8KiB in use in bin. 10.4KiB client-requested 

2022-02-18 11:46:04.321196: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 202.30MiB (rounded to 212123648)requested by op dense_15/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:46:04.321286: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:46:04.321297: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:46:04.321302: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 19, Chunks in use: 19. 11.8KiB allocated for chunks. 11.8KiB in use in bin. 10.4KiB client-requeste

2022-02-18 11:46:14.322812: W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory trying to allocate 15.00MiB (rounded to 15728640)requested by op dense_10/kernel/Initializer/random_uniform/RandomUniform
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-02-18 11:46:14.322863: I tensorflow/core/common_runtime/bfc_allocator.cc:991] BFCAllocator dump for GPU_0_bfc
2022-02-18 11:46:14.322873: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (256): 	Total Chunks: 98, Chunks in use: 98. 24.5KiB allocated for chunks. 24.5KiB in use in bin. 1.9KiB client-requested in use in bin.
2022-02-18 11:46:14.322878: I tensorflow/core/common_runtime/bfc_allocator.cc:998] Bin (512): 	Total Chunks: 19, Chunks in use: 19. 11.8KiB allocated for chunks. 11.8KiB in use in bin. 10.4KiB client-requested 

# 3. Build Agent with Keras-RL

In [247]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=80000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=1000, target_model_update=1e-2)
    return dqn

In [249]:
env = WordleEnv(newWordEvery=1, showStatsEvery=8, showStatsEveryMove=False)
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=3e-4), metrics=['accuracy'])
dqn.fit(env, nb_steps=500000, visualize=False, verbose=1)

Target word: acock 

slmui     1|[5;30;42m[0m
tdlmq     2|[5;30;42m[0m
behhh     3|[5;30;42m[0m
ht[6;30;43ma[0m[6;30;43mk[0mu     4|[5;30;42m[0m
d[6;30;43ma[0muml     5|[5;30;42m[0m
ds[6;30;43mk[0mvy     6|[5;30;42m[0m
          Wins: 0/1041 
 1363/10000 [===>..........................] - ETA: 4:57 - reward: -10.7131done, took 1056.830 seconds


<tensorflow.python.keras.callbacks.History at 0x7f51bd00c970>

In [125]:
model.save('/home/face9/dev/OpenAI-Reinforcement-Learning-with-Custom-Environment/wordle')

INFO:tensorflow:Assets written to: /home/face9/dev/OpenAI-Reinforcement-Learning-with-Custom-Environment/wordle/assets
