In [None]:
ingredient_group_target_ranges = {
    'fruit': 1, # 1 fruit a day per meal
    'veg': 1, # 1 veg per day per meal
    'non_processed_meat': 1, # Portion of non processed meat has to be provided accept if a portion of processed meat is provided. This current env is one day meal selection.
    'processed_meat': 1, # Processed meat, see above ^
    'carbs': 1, # Starchy food , a portion of this should be provided every day
    'dairy': 1, # Dairy, a portion of this should be provided every day
    'bread': 1, # Bread should be provided as well as a portion of starchy food
    'confectionary': 0 # No confectionary should be provided
}
print(ingredient_group_target_ranges.keys())
food_group_counts = {k: 0 for k in ingredient_group_target_ranges.keys()}

In [1]:
from utils.process_data import get_data
ingredient_df = get_data()

Successfully read 137 lines from the file. Loaded 136 ingredients.


In [2]:
print(ingredient_df.keys())

Index(['Category1', 'Category2', 'Category3', 'Category4', 'Category5',
       'Category6', 'Category7', 'Mean_g_per_day', 'StandardDeviation',
       'Coefficient of Variation', 'Cost_100g', 'Calories_kcal_per_100g',
       'Fat_g', 'Saturates_g', 'Carbs_g', 'Sugars_g', 'Fibre_g', 'Protein_g',
       'Salt_g', 'CO2_kg_per_100g', 'Animal Welfare Rating',
       'Rainforest Rating', 'Water Scarcity Rating', 'CO2 FU Rating',
       'Group A veg', 'Group A fruit', 'Group B', 'Oily Fish', 'Red Meat',
       'Group C', 'Group D', 'Group E', 'Oil', 'Bread', 'Confectionary'],
      dtype='object')


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Actor and Critic
class Actor(nn.Module):
    def __init__(self, state_dim, selection_action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, selection_action_dim)
        self.fc3 = nn.Linear(128, selection_action_dim)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        
        quantity = torch.relu(self.fc3(x))
        return quantity

class Critic(nn.Module):
    def __init__(self, state_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        value = self.fc2(x)
        return value

In [77]:
import os
x = os.path.abspath("Masters_RL/saved_models/tensorboard/a2c_simple_calorie_env/")
print(os.path.exists(x))

False


In [70]:
import gym
from gym import spaces
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

class CustomEnv(gym.Env):
    def __init__(self, ingredient_df, num_people=1):
        super(CustomEnv, self).__init__()
        
        self.ingredient_df = ingredient_df
        self.num_people = num_people
        
        # Define action space
        n_ingredients = len(self.ingredient_df)
        
        self.action_space = spaces.Dict({
            'selection': spaces.MultiBinary(n_ingredients),
            'quantity': spaces.Box(low=0, high=100, shape=(n_ingredients,), dtype=np.float32)
        })
        
        # State includes quantities of ingredients and average calories
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(n_ingredients + 1,), dtype=np.float32)
        
        # self.state = None

    # def reset(self):
    #     n_ingredients = len(self.ingredient_df)
    #     self.state = np.zeros(n_ingredients + 1, dtype=np.float32)  # Reset state to zeros including average calories
    #     return self.state

    def calculate_reward(self, action):
        reward = 0.0
        
        # Convert action selection and quantity to numpy arrays
        selection = np.array(action['selection'], dtype=np.float32)
        quantity = np.array(action['quantity'], dtype=np.float32)
        
        # Reward based on the number of selected ingredients
        total_selection = np.sum(selection)
        
        if total_selection > 10:
            reward -= 10
        elif total_selection < 5:
            reward -= 10
        else:
            reward += 10
        
        # Calculate calories for selected ingredients
        calories_per_100g = self.ingredient_df['Calories_kcal_per_100g'].values
        calories_selected_ingredients = selection * quantity * calories_per_100g / 100
        
        # Calculate average calories per day per person
        average_calories_per_day = np.sum(calories_selected_ingredients) / self.num_people
        
        # Reward based on the average calories per day
        if 2000 <= average_calories_per_day <= 3000:
            reward += 100
            done = True
        else:
            reward -= 10
            done = False
        
        return reward, average_calories_per_day, done

    def step(self, action):
        n_ingredients = len(self.ingredient_df)
        
        # Update the state based on action
        self.state[:n_ingredients] = action['quantity']  # Update state with quantities of selected ingredients
        
        # Calculate reward
        reward, average_calories_per_day, done = self.calculate_reward(action)
        
        # Update the state with average calories
        self.state[-1] = average_calories_per_day
        
        info = {
            'average_calories_per_day': average_calories_per_day
        }

        return self.state, reward, done, info

def train_actor_critic(env, actor, critic, num_episodes=1000, gamma=0.99, actor_lr=1e-4, critic_lr=1e-3, device='cuda'):
    # Move models to the GPU
    actor.to(device)
    critic.to(device)
    
    actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)
    mse_loss = nn.MSELoss()

    for episode in range(num_episodes):
        state = env.reset()
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        done = False
        total_reward = 0
        episode_info = None

        while not done:
            selection_probs, quantity = actor(state)
            selection = (selection_probs > 0.5).float()  # Binarize selection actions
            action = {
                'selection': selection.detach().cpu().numpy().squeeze(),
                'quantity': quantity.detach().cpu().numpy().squeeze()
            }
            next_state, reward, done, info = env.step(action)
            next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device)
            reward = torch.tensor([reward], dtype=torch.float32).to(device)

            # Save the info for the end of the episode
            episode_info = info

            # Update Critic
            value = critic(state)
            next_value = critic(next_state)
            target = reward + (1 - done) * gamma * next_value
            critic_loss = mse_loss(value, target.detach())
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()

            # Update Actor
            advantage = (target - value).detach()
            actor_loss = -advantage * (selection_probs.mean() + quantity.mean())
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()

            state = next_state
            total_reward += reward

            # Debug prints
            print(f"Reward: {reward.item()}, Done: {done}")

        if episode_info is not None and 'average_calories_per_day' in episode_info:
            average_calories_per_day = episode_info['average_calories_per_day']
        else:
            average_calories_per_day = float('nan')

        print(f"Episode: {episode+1}, Total Reward: {total_reward.item()}, Average Calories per Day: {average_calories_per_day}")


In [69]:
# Create environment and models
env = CustomEnv(ingredient_df)

state_dim = env.state.shape[0]
selection_action_dim = env.action_space['selection'].n
actor = Actor(state_dim, selection_action_dim)
critic = Critic(state_dim)


# Train the model
train_actor_critic(env, actor, critic, num_episodes=5)

# Evaluate the model
# evaluate_actor(env, actor, num_days=5)

AttributeError: 'NoneType' object has no attribute 'shape'

In [7]:
# Evaluation function
def evaluate_actor(env, actor, num_days=5):
    state = env.reset()
    state = torch.FloatTensor(state).unsqueeze(0)
    done = False
    total_reward = 0
    episode_selections = []

    while not done:
        with torch.no_grad():
            selection_probs, quantity = actor(state)
            selection = (selection_probs > 0.5).float()  # Binarize selection actions
            action = {
                'selection': selection.detach().numpy().squeeze(),
                'quantity': quantity.detach().numpy().squeeze()
            }
        next_state, reward, done, info = env.step(action)
        next_state = torch.FloatTensor(next_state).unsqueeze(0)
        episode_selections.append(action)

        state = next_state
        total_reward += reward

    print(f"Evaluation Total Reward: {total_reward}")
    for day, selections in enumerate(env.selections, 1):
        print(f"Day {day}:")
        for i, (selection, quantity) in enumerate(selections):
            print(f"  Try {i+1}: Selection - {selection}, Quantity - {quantity}")

