In [11]:
import numpy as np
import pandas as pd

# Functions for creating a dog

In [46]:
def transform_to_dec(value):
    return np.floor(100*value)/100

def apply_decreasing_rate(value: float, rate: float) -> float:
    """
    Apply a decreasing rate to a value
    :param value: current value
    :param rate: per second
    :return: updated value
    """
    return value - (60 * rate)

def converge(value: float, target: float, ratio: float) -> float:
    diff: float = (target - value) * ratio
    return value + diff

def update_walking(dog_table_row):
    dog_table_row['fat'] = transform_to_dec(converge(dog_table_row.iloc[0]['fat'], 0.0, walking_fat_converge_rate))
    dog_table_row['affection'] = transform_to_dec(converge(dog_table_row.iloc[0]['affection'], 1.0, walking_affection_converge_rate))
    return (dog_table_row['fat'], dog_table_row['affection'])

def update_feeding(dog_table_row):
    dog_table_row['food'] = transform_to_dec(min(dog_table_row.iloc[0]['food'] + eating_food_increase, 1.0))
    dog_table_row['fat'] = transform_to_dec(min(dog_table_row.iloc[0]['fat'] + eating_fat_increase, 1.0))
    return (dog_table_row['food'], dog_table_row['fat'])

def update_playing(dog_table_row):
    dog_table_row['fat'] = transform_to_dec(converge(dog_table_row.iloc[0]['fat'], 0.0, playing_fat_converge_rate))
    dog_table_row['affection'] = transform_to_dec(converge(dog_table_row.iloc[0]['affection'], 1.0, playing_affection_converge_rate))
    return (dog_table_row['fat'], dog_table_row['affection'])

def update_food(dog_table_row):
    return transform_to_dec(max(
        0.0,
        apply_decreasing_rate(dog_table_row.iloc[0]['food'], food_consumption_rate)
    ))

def update_affection(dog_table_row):
    return transform_to_dec(max(
        0.0,
        apply_decreasing_rate(dog_table_row.iloc[0]['affection'], affection_consumption_rate)
    ))

def update_happiness(dog_table_row) -> float:  # between 0 and 1
    return min(
        dog_table_row.iloc[0]['food'],
        1.0 - dog_table_row.iloc[0]['fat'],
        dog_table_row.iloc[0]['affection']
    )

def update_alive(dog_table_row):
    return dog_table_row.iloc[0]['alive'] & (dog_table_row.iloc[0]['happiness'] > 0.0)

def new_state_after_1_minute(dog_table, action_taken='NO ACTION'):
    
    dog_table_row = dog_table.tail(1)
    dog_table_row['minute'] += 1
    
    dog_table_row['food'] = update_food(dog_table_row)
    dog_table_row['affection'] = update_affection(dog_table_row)
    dog_table_row['happiness'] = update_happiness(dog_table_row)
    dog_table_row['alive'] = update_alive(dog_table_row)

    dog_table_row['minutes_since_last_action'] += 1

    if ((action_taken != 'NO ACTION') & (dog_table_row.iloc[0]['can_action_be_taken'])):
        dog_table_row['last_action_taken'] = action_taken
        dog_table_row['can_action_be_taken'] = False
        dog_table_row['minutes_since_last_action'] = 0
    
    if ((dog_table_row.iloc[0]['last_action_taken'] == 'WALKING') &
        (dog_table_row.iloc[0]['minutes_since_last_action'] == WALKING_TIME)):
        dog_table_row['fat'], dog_table_row['affection'] = update_walking(dog_table_row)
        dog_table_row['last_action_taken'] = 'NO ACTION'
        dog_table_row['can_action_be_taken'] = True
        dog_table_row['minutes_since_last_action'] = 0        

    if ((dog_table_row.iloc[0]['last_action_taken'] == 'FEEDING') &
        (dog_table_row.iloc[0]['minutes_since_last_action'] == EATING_TIME)):
        dog_table_row['food'], dog_table_row['fat'] = update_feeding(dog_table_row)
        dog_table_row['last_action_taken'] = 'NO ACTION'
        dog_table_row['can_action_be_taken'] = True
        dog_table_row['minutes_since_last_action'] = 0

    if ((dog_table_row.iloc[0]['last_action_taken'] == 'PLAYING') &
        (dog_table_row.iloc[0]['minutes_since_last_action'] == PLAYING_TIME)):
        dog_table_row['fat'], dog_table_row['affection'] = update_playing(dog_table_row)
        dog_table_row['last_action_taken'] = 'NO ACTION'
        dog_table_row['can_action_be_taken'] = True
        dog_table_row['minutes_since_last_action'] = 0


    dog_table = dog_table.append(dog_table_row, ignore_index=True)
    
    return dog_table

# Initialize dog

In [47]:
WALKING_TIME = 15
EATING_TIME = 1
PLAYING_TIME = 4

food_consumption_rate = 1.0 / (30 * 3600)
affection_consumption_rate = 1.0 / (50 * 3600)
walking_fat_converge_rate = 0.2
walking_affection_converge_rate = 0.4
playing_fat_converge_rate = 0.1
playing_affection_converge_rate = 0.20
eating_food_increase = 0.6
eating_fat_increase = 0.25

cnt_limit = 24*60

In [71]:
def init_dog_table():

    dog_table = pd.DataFrame(columns=[
        'dog_id', 'minute',
        'happiness', 'food', 'fat', 'affection',
        'alive', 'can_action_be_taken', 'last_action_taken', 'minutes_since_last_action'
    ])

    dog_table = dog_table.append({
        'dog_id': 1, 'minute': 0,
        'happiness': 0.50, 'food': 0.50, 'fat': 0, 'affection': 0.50,
        'alive': True, 'can_action_be_taken': True, 'last_action_taken': 'NO ACTION', 'minutes_since_last_action': 0
        }, ignore_index=True)
    
    return dog_table

# Q value function - initialisation

In [64]:
observation_space_n = 100*100*100
action_space = ['NO ACTION', 'WALKING', 'FEEDING', 'PLAYING']
action_space_n = len(possible_actions)

# Choose action

In [66]:
# Function to choose the next action

def choose_action(state, Q, action_space, epsilon = 0.3):
    if np.random.uniform(0, 1) < epsilon:
        action = random.choices(action_space, k = 1)[0]
    else:
        action = np.argmax(Q[state, :])
    return action

# Update Q function

In [181]:
# Function to learn the Q-value
alpha = 0.85
gamma = 0.95

def update(Q, state, state2, reward, action, action2):
    
    state_q = get_state_q(state)
    state2_q = get_state_q(state2)
    
    predict = Q[state_q, action]
    target = reward + gamma * Q[state2_q, action2]
    
    Q[state_q, action] = Q[state_q, action] + alpha * (target - predict)
    
    #print(Q[state_q, action])
    
    return Q

# Starting

In [49]:
current_state = dog_table.tail(1)[
    ['food','fat','affection','can_action_be_taken','last_action_taken','minutes_since_last_action']]

In [50]:
current_state

Unnamed: 0,food,fat,affection,can_action_be_taken,last_action_taken,minutes_since_last_action
0,0.5,0,0.5,True,NO ACTION,0


In [56]:
dog_table = new_state_after_1_minute(dog_table, action_taken='NO ACTION')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

In [57]:
dog_table

Unnamed: 0,dog_id,minute,happiness,food,fat,affection,alive,can_action_be_taken,last_action_taken,minutes_since_last_action
0,1,2,0.48,0.48,0,0.48,True,True,NO ACTION,0
1,1,2,0.48,0.48,0,0.48,True,True,NO ACTION,1
2,1,3,0.47,0.47,0,0.47,True,True,NO ACTION,2
3,1,4,0.46,0.46,0,0.46,True,True,NO ACTION,3
4,1,5,0.45,0.45,0,0.45,True,True,NO ACTION,4


# Get reward

In [36]:
def get_future_states_until_end(dog_table, cnt_limit):

    while ((dog_table.tail(1).iloc[0]['alive']) & (dog_table.tail(1).iloc[0]['minute'] < cnt_limit)):

        current_state = dog_table.tail(1)[
            ['food','fat','affection','can_action_be_taken','last_action_taken','minutes_since_last_action']]
        action_taken = choose_an_action(current_state)
        # print(action_taken)

        dog_table = new_state_after_1_minute(dog_table, action_taken)

    return dog_table

In [38]:
future_dog_table = get_future_states_until_end(dog_table, cnt_limit)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

In [39]:
future_dog_table

Unnamed: 0,dog_id,minute,happiness,food,fat,affection,alive,can_action_be_taken,last_action_taken,minutes_since_last_action
0,1,0,0.500000,0.500000,0,0.500000,True,True,NO ACTION,0
1,1,1,0.499444,0.499444,0,0.499667,True,True,NO ACTION,1
2,1,2,0.498889,0.498889,0,0.499333,True,False,PLAYING,0
3,1,3,0.498333,0.498333,0,0.499000,True,False,PLAYING,1
4,1,4,0.497778,0.497778,0,0.498667,True,False,PLAYING,2
...,...,...,...,...,...,...,...,...,...,...
1436,1,1436,0.723127,0.977778,0.276873,0.988876,True,True,NO ACTION,1
1437,1,1437,0.723127,0.977222,0.276873,0.988542,True,True,NO ACTION,2
1438,1,1438,0.723127,0.976667,0.276873,0.988209,True,True,NO ACTION,3
1439,1,1439,0.723127,0.976111,0.276873,0.987876,True,True,NO ACTION,4


In [42]:
reward = sum(future_dog_table['happiness'])

In [43]:
reward

1081.5884004075554

# This is how it is done

In [70]:
Q = np.zeros((observation_space_n, action_space_n))

In [199]:
max_steps = 1000
total_episodes = 10

In [200]:
# Starting the SARSA learning
all_overall_reward = []

for episode in range(total_episodes):
    print(episode)
    
    t = 0
    overall_reward = 0
    
    dog_table = init_dog_table()
    state1 = dog_table.tail(1)
    
    action1 = choose_action(
        state = state1,
        Q = Q,
        action_space = action_space)
  
    while ((state1.iloc[0]['alive']) & (t < max_steps)): 
        
        #Getting the next state
        dog_table = new_state_after_1_minute(
            dog_table,
            action_taken = action_space[action1])
        
        state2 = dog_table.tail(1)
        
        reward = define_reward(dog_table)
        overall_reward += reward
        
        #Choosing the next action 
        action2 = choose_action(
            state = state2,
            Q = Q,
            action_space = action_space)

        #Learning the Q-value 
        Q = update(
            Q = Q,
            state = state1,
            state2 = state2,
            reward = reward,
            action = action1, 
            action2 = action2)
        #print(Q[get_state_q(state1),:])
  
        state1 = state2 
        action1 = action2 
          
        #Updating the respective vaLues 
        t += 1
    
    all_overall_reward.append(overall_reward)

0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [198]:
dog_table

Unnamed: 0,dog_id,minute,happiness,food,fat,affection,alive,can_action_be_taken,last_action_taken,minutes_since_last_action
0,1,0,0.50,0.50,0,0.50,True,True,NO ACTION,0
1,1,1,0.49,0.49,0,0.49,True,False,FEEDING,0
2,1,2,0.48,1.00,0.25,0.48,True,True,NO ACTION,0
3,1,3,0.47,0.99,0.25,0.47,True,False,WALKING,0
4,1,4,0.46,0.98,0.25,0.46,True,False,WALKING,1
...,...,...,...,...,...,...,...,...,...,...
460,1,460,0.04,0.04,0.28,0.54,True,False,WALKING,6
461,1,461,0.03,0.03,0.28,0.53,True,False,WALKING,7
462,1,462,0.02,0.02,0.28,0.52,True,False,WALKING,8
463,1,463,0.01,0.01,0.28,0.51,True,False,WALKING,9


In [133]:
t = 0

dog_table = init_dog_table()
state1 = dog_table.tail(1)

action1 = choose_action(
    state = state1,
    Q = Q,
    action_space = action_space)

state1, action1

(  dog_id minute  happiness  food fat  affection alive can_action_be_taken  \
 0      1      0        0.5   0.5   0        0.5  True                True   
 
   last_action_taken minutes_since_last_action  
 0         NO ACTION                         0  , 1)

In [134]:
#Getting the next state
dog_table = new_state_after_1_minute(
    dog_table,
    action_taken = action_space[action1])

In [None]:
state2 = dog_table.tail(1)
reward = dog_table.tail(1).iloc[0]['happiness']

In [142]:
#Choosing the next action 
action2 = choose_action(
    state = state2,
    Q = Q,
    action_space = action_space)

In [143]:
state2, reward, action2

(  dog_id minute  happiness  food   fat  affection alive can_action_be_taken  \
 2      1      2       0.48   1.0  0.25       0.48  True                True   
 
   last_action_taken minutes_since_last_action  
 2         NO ACTION                         0  , 0.48, 0)

In [145]:
def get_state_q(current_state):
    return int('{:02d}{:02d}{:02d}'.format(
        min(99,int(current_state.iloc[0]['food']*100)),
        min(99,int(current_state.iloc[0]['fat']*100)),
        min(99,int(current_state.iloc[0]['affection']*100))
    ))

In [146]:
def choose_action(state, Q, action_space, epsilon = 0.3):
    state_q = get_state_q(state)
    if np.random.uniform(0, 1) < epsilon:
        action = random.choices(range(len(action_space)), k = 1)[0]
    else:
        action = np.argmax(Q[state_q, :])
    return action

In [147]:
def define_reward(dog_table):
    return dog_table.tail(1).iloc[0]['happiness']

In [116]:
action = choose_action(
    state = current_state,
    Q = Q,
    action_space = action_space)
action_space[action]

'WALKING'

In [117]:
#Getting the next state
dog_table = new_state_after_1_minute(
    dog_table,
    action_taken = action_space[action])

In [118]:
dog_table

Unnamed: 0,dog_id,minute,happiness,food,fat,affection,alive,can_action_be_taken,last_action_taken,minutes_since_last_action
0,1,0,0.5,0.5,0,0.5,True,True,NO ACTION,0
1,1,1,0.49,0.49,0,0.49,True,False,WALKING,0


In [120]:
next_state = dog_table.tail(1)
next_state

Unnamed: 0,dog_id,minute,happiness,food,fat,affection,alive,can_action_be_taken,last_action_taken,minutes_since_last_action
1,1,1,0.49,0.49,0,0.49,True,False,WALKING,0


In [121]:
reward = dog_table.tail(1).iloc[0]['happiness']
reward

0.49

In [123]:
action2 = choose_action(
    state = next_state,
    Q = Q,
    action_space = action_space)
action_space[action2]

'NO ACTION'

In [129]:
update(
    Q = Q,
    state = current_state,
    state2 = next_state,
    reward = reward,
    action = action, 
    action2 = action2)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [201]:
print(Q[500050,])

[4.78537054 4.88131782 4.62804009 5.50943566]


In [186]:
get_state_q(state2)

68