In [2]:
import numpy as np

class QLearningLinearFA:
    def __init__(self, num_features, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.num_features = num_features
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.weights = np.zeros((num_actions, num_features))

    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(np.dot(self.weights, state))

    def update_weights(self, state, action, reward, next_state):
        target = reward + self.discount_factor * np.max(np.dot(self.weights, next_state))
        predicted = np.dot(self.weights[action], state)
        error = target - predicted
        self.weights[action] += self.learning_rate * error * state

num_features = 5
num_actions = 10
ql = QLearningLinearFA(num_features, num_actions)

num_episodes = 5000
for episode in range(num_episodes):
    state = np.random.rand(num_features)
    done = False
    total_reward = 0
    
    while not done:
        action = ql.select_action(state)
        next_state = np.random.rand(num_features)
        reward = np.random.randn()
        done = np.random.rand() < 0.1
        ql.update_weights(state, action, reward, next_state)
        state = next_state
        total_reward += reward

    print("Episode:", episode, "Total Reward:", total_reward)


Episode: 0 Total Reward: -2.852493736045364
Episode: 1 Total Reward: -0.5764803207749665
Episode: 2 Total Reward: -6.532935837223629
Episode: 3 Total Reward: -1.4747344945666923
Episode: 4 Total Reward: 4.558710696595237
Episode: 5 Total Reward: -2.0816227038915316
Episode: 6 Total Reward: 2.1004258011367063
Episode: 7 Total Reward: 1.5032842543229237
Episode: 8 Total Reward: -1.2502293375703428
Episode: 9 Total Reward: -4.098570641634233
Episode: 10 Total Reward: 2.457182648149634
Episode: 11 Total Reward: 0.3492840854885098
Episode: 12 Total Reward: 1.214094255085151
Episode: 13 Total Reward: 1.2670990585251325
Episode: 14 Total Reward: -0.6960843771895917
Episode: 15 Total Reward: -3.316742437122926
Episode: 16 Total Reward: -5.927718335481949
Episode: 17 Total Reward: -0.4408407968395611
Episode: 18 Total Reward: 2.6563667495323404
Episode: 19 Total Reward: -2.614577899462354
Episode: 20 Total Reward: 2.7179033007698274
Episode: 21 Total Reward: 0.08843125820782155
Episode: 22 Tota