In [1]:
import numpy as np
import random

In [8]:
# Step 1: Find out states and action
states=np.arange(16,31)
actions=['ON','OFF']
states

array([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])

In [9]:
actions

['ON', 'OFF']

In [12]:
# Step 2: Create Qtable set alpha, gama, epsilor, episodes
# Q table is [s,a] state,action
Q=np.zeros((len(states),len(actions)))

In [25]:
#alpha: (0-1)
#gama: 1
#epsilan:probability
#episodes: independent training runs ()
alpha=0.1
gamma=0.9
epsilon=0.2
episodes=300

In [26]:
# Step 3: Write Reward Function
def get_reward(temp, action):
    if 20 <= temp <=24:
        reward = 10 #comfortable
    else:
        reward = -5 #uncomfortable
    if action == 'ON':
        reward -= 2 #cost of energy
    return reward

In [27]:
# Step 4 Environment Dynamic Change (e.g. Temperature)
def next_temp(temp,action):
    if action=='ON':
        temp-=random.choice([1,2])
    else:
        temp+=random.choice([0,1,2])
    return int(np.clip(temp,16,30))

In [28]:
# Step 5 Training Loop
for ep in range(episodes):
    temp=random.choice(states)
    done=False
    for _ in range(20): # Limit steps per episode
        # choose action (epsilon-greedy)
        if random.uniform(0,1)<epsilon:
            action=random.choice(actions)
        else:
            action=actions[np.argmax(Q[temp-16])]

        next_state=next_temp(temp,action)
        reward=get_reward(next_state,action)

        a=actions.index(action)
        best_next=np.max(Q[next_state-16])
        Q[temp - 16, a] += alpha * (reward + gamma * best_next - Q[temp-16,a])

        temp=next_state
        
print('Training Done')

Training Done


In [29]:
temp = 28
for step in range(10):
    action=actions[np.argmax(Q[temp-16])]
    print(f'Step {step+1}: Temp={temp}C -> Action={action}')
    temp = next_temp(temp,action)

Step 1: Temp=28C -> Action=ON
Step 2: Temp=26C -> Action=ON
Step 3: Temp=24C -> Action=ON
Step 4: Temp=23C -> Action=ON
Step 5: Temp=22C -> Action=OFF
Step 6: Temp=22C -> Action=OFF
Step 7: Temp=22C -> Action=OFF
Step 8: Temp=24C -> Action=ON
Step 9: Temp=23C -> Action=ON
Step 10: Temp=22C -> Action=OFF


In [32]:
try:
    temp=int(input('Enter Starting Room Temp (16-30)'))
    if temp<16 or temp>30:
             raise ValueError('Temp out of range')

except ValueError as ve:
    print(ve)
    temp=25
    print('Set to Default 25 C')
print(f'\n Starting temp: {temp} C')
for step in range(10):
    action = actions[np.argmax(Q[temp-16])]
    print(f'Step {step+1}: Temp={temp}C -> Action={action}')
    temp = next_temp(temp,action)
    print('\nDone')

Enter Starting Room Temp (16-30) 20



 Starting temp: 20 C
Step 1: Temp=20C -> Action=OFF

Done
Step 2: Temp=20C -> Action=OFF

Done
Step 3: Temp=20C -> Action=OFF

Done
Step 4: Temp=20C -> Action=OFF

Done
Step 5: Temp=21C -> Action=OFF

Done
Step 6: Temp=22C -> Action=OFF

Done
Step 7: Temp=22C -> Action=OFF

Done
Step 8: Temp=23C -> Action=ON

Done
Step 9: Temp=22C -> Action=OFF

Done
Step 10: Temp=23C -> Action=ON

Done
