In [1]:
import numpy as np

In [6]:
class QLearningLinearFA:
    def __init__(self, num_features, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.num_features = num_features
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.weights = np.zeros((num_actions, num_features))
    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.num_actions)  
        else:
            return np.argmax(np.dot(self.weights, state)) 
    def update_weights(self, state, action, reward, next_state):
        target = reward + self.discount_factor * np.max(np.dot(self.weights, next_state))
        predicted = np.dot(self.weights[action], state)
        error = target - predicted
        self.weights[action] += self.learning_rate * error * state
num_features = 5 
num_actions = 5  
ql = QLearningLinearFA(num_features, num_actions)
num_episodes = 1500
for episode in range(num_episodes):
    state = np.random.rand(num_features)
    done = False
    total_reward = 0
    while not done:
        action = ql.select_action(state)
        next_state = np.random.rand(num_features)
        reward = np.random.randn()  
        done = np.random.rand() < 0.1 
        ql.update_weights(state, action, reward, next_state)
        state = next_state
        total_reward += reward
    print("Episode:", episode, "Total Reward:", total_reward)


Episode: 0 Total Reward: -0.8923389416988925
Episode: 1 Total Reward: 0.4026676475898673
Episode: 2 Total Reward: 0.514361246690293
Episode: 3 Total Reward: -3.9199008656817997
Episode: 4 Total Reward: -6.0082088922743555
Episode: 5 Total Reward: -0.7396837825082481
Episode: 6 Total Reward: -1.6332907142906747
Episode: 7 Total Reward: 1.8274577212287675
Episode: 8 Total Reward: -0.4182538935116325
Episode: 9 Total Reward: 2.117856458090359
Episode: 10 Total Reward: 1.0086694068354185
Episode: 11 Total Reward: -2.1446076115573756
Episode: 12 Total Reward: -1.7533379263223035
Episode: 13 Total Reward: -3.277999413541319
Episode: 14 Total Reward: -3.2510243978018423
Episode: 15 Total Reward: 2.9252482816803385
Episode: 16 Total Reward: 1.6228593114047893
Episode: 17 Total Reward: -0.04594007253964183
Episode: 18 Total Reward: -3.4458667841437944
Episode: 19 Total Reward: 0.017118089726632313
Episode: 20 Total Reward: -2.7361413298308386
Episode: 21 Total Reward: 1.4143305631467613
Episode

Episode: 310 Total Reward: -2.868059878494399
Episode: 311 Total Reward: -0.45084837481979234
Episode: 312 Total Reward: -10.047459443073528
Episode: 313 Total Reward: 0.9510165694728172
Episode: 314 Total Reward: 6.975676351842244
Episode: 315 Total Reward: 4.104877785652514
Episode: 316 Total Reward: 1.0114338843103863
Episode: 317 Total Reward: -0.9419937795878057
Episode: 318 Total Reward: 2.4804326613303638
Episode: 319 Total Reward: 1.6922750325836056
Episode: 320 Total Reward: 0.9379510819641492
Episode: 321 Total Reward: -0.00758604272453578
Episode: 322 Total Reward: -4.811448757607691
Episode: 323 Total Reward: 1.8607562874028496
Episode: 324 Total Reward: 3.119150175116324
Episode: 325 Total Reward: 0.9423818797097616
Episode: 326 Total Reward: -3.930264686891836
Episode: 327 Total Reward: 0.31545391864776584
Episode: 328 Total Reward: 8.436915955832314
Episode: 329 Total Reward: 0.5252359180669589
Episode: 330 Total Reward: -1.3222809253995387
Episode: 331 Total Reward: -2.

Episode: 781 Total Reward: -5.10025194534685
Episode: 782 Total Reward: 3.520185077135805
Episode: 783 Total Reward: 2.041629441999829
Episode: 784 Total Reward: -0.9317455225448487
Episode: 785 Total Reward: 2.3931685329079517
Episode: 786 Total Reward: 1.1808833273517345
Episode: 787 Total Reward: -2.086911081117717
Episode: 788 Total Reward: 0.08764761235765256
Episode: 789 Total Reward: -1.796136105476291
Episode: 790 Total Reward: 0.0735116091428696
Episode: 791 Total Reward: -1.3034463192535188
Episode: 792 Total Reward: 0.40744918942121444
Episode: 793 Total Reward: -3.2622936168943184
Episode: 794 Total Reward: -2.2250062817771004
Episode: 795 Total Reward: -0.7839393485741937
Episode: 796 Total Reward: 1.3137878657125595
Episode: 797 Total Reward: -0.5230622104146505
Episode: 798 Total Reward: 0.1064554050705659
Episode: 799 Total Reward: 0.5590976061188959
Episode: 800 Total Reward: -1.555353273688144
Episode: 801 Total Reward: 7.908678394454158
Episode: 802 Total Reward: 1.7

Episode: 1262 Total Reward: 7.71938072247381
Episode: 1263 Total Reward: 0.6373926699355414
Episode: 1264 Total Reward: -3.639792614749971
Episode: 1265 Total Reward: 0.1045913727165763
Episode: 1266 Total Reward: -3.585944762129612
Episode: 1267 Total Reward: -0.950921431923541
Episode: 1268 Total Reward: -2.2791926008522525
Episode: 1269 Total Reward: -1.6542490299070685
Episode: 1270 Total Reward: 11.250349571947085
Episode: 1271 Total Reward: -7.065602871079612
Episode: 1272 Total Reward: 3.325919321545314
Episode: 1273 Total Reward: 2.5991605583901944
Episode: 1274 Total Reward: -1.458165368814174
Episode: 1275 Total Reward: -0.023022553142411584
Episode: 1276 Total Reward: 3.740515894198139
Episode: 1277 Total Reward: -0.328243976540019
Episode: 1278 Total Reward: -3.3486556720428715
Episode: 1279 Total Reward: 2.7764480603210866
Episode: 1280 Total Reward: 2.780897523986683
Episode: 1281 Total Reward: -3.056182662236635
Episode: 1282 Total Reward: 1.3774683008944066
Episode: 128