In [1]:
from utils.process_data import get_data


In [2]:
ingredient_df = get_data()

Successfully read 137 lines from the file. Loaded 136 ingredients.


In [3]:
print(ingredient_df.keys())

Index(['Category1', 'Category2', 'Category3', 'Category4', 'Category5',
       'Category6', 'Category7', 'Mean_g_per_day', 'StandardDeviation',
       'Coefficient of Variation', 'Cost_100g', 'Calories_kcal_per_100g',
       'Fat_g', 'Saturates_g', 'Carbs_g', 'Sugars_g', 'Fibre_g', 'Protein_g',
       'Salt_g', 'CO2_kg_per_100g', 'Animal Welfare Rating',
       'Rainforest Rating', 'Water Scarcity Rating', 'CO2 FU Rating',
       'Group A veg', 'Group A fruit', 'Group B', 'Oily Fish', 'Red Meat',
       'Group C', 'Group D', 'Group E', 'Oil', 'Bread', 'Confectionary'],
      dtype='object')


In [15]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
import random

class CustomEnv(gym.Env):
    def __init__(self, ingredient_df, num_days=5, num_people=50):
        super(CustomEnv, self).__init__()
        
        # Number of ingredients
        self.ingredient_df = ingredient_df
        self.num_ingredients = len(ingredient_df)
        self.num_days = num_days
        self.num_people = num_people
        
        # Action space: ingredient amount (between 0 and 100 per ingredient)
        self.action_space = spaces.Box(low=np.zeros(self.num_ingredients), high=np.ones(self.num_ingredients) * 100, dtype=np.float32)
        
        # Observation space: average calories per day per person
        self.observation_space = spaces.Box(low=np.array([0.0]), high=np.array([10000.0]), dtype=np.float32)
        
        # Initial state: average calories per day per person
        self.state = np.zeros(1, dtype=np.float32)
        
        # Set day count
        self.day_count = 0

    def step(self, action):
            # Ensure the action is within the action space
            action = np.clip(action, self.action_space.low, self.action_space.high)
            
            # Calculate total calories for the chosen ingredient amounts
            total_calories = np.sum(action * self.ingredient_df['Calories_kcal_per_100g'].values)
            
            # Calculate average calories per day per person
            average_calories_per_day = total_calories / (self.num_days * self.num_people)
            
            # Update state
            self.state = np.array([average_calories_per_day], dtype=np.float32)
            
            # Calculate reward: reward is 1 if average calories are between 2000 and 3000, else -1
            if 2000 <= average_calories_per_day <= 3000:
                reward = 1
            else:
                reward = -1
            
            # Increment day count
            self.day_count += 1
            
            # Check if episode is done (after the specified number of days)
            done = self.day_count >= self.num_days
            
            # Placeholder for info
            info = {
                'total_calories': total_calories,
                'average_calories_per_day': average_calories_per_day
            }
            
            # Return step information
            return self.state, reward, done, info

    def reset(self):
        # Reset the state to zero average calories
        self.state = np.zeros(1, dtype=np.float32)
        # Reset day count
        self.day_count = 0
        return self.state

    def render(self):
        # Implement visualization (optional)
        print(f"Day: {self.day_count}, Average Calories per Day: {self.state[0]}")

    def close(self):
        # Clean up (optional)
        pass

# Instantiate the environment
env = CustomEnv(ingredient_df)

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_dim)
        self.activation = nn.Tanh()

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        x = torch.relu(self.fc2(x))
        x = self.activation(self.fc3(x))
        return x * 100  # Ensure actions are in the range 0 to 100

class Critic(nn.Module):
    def __init__(self, state_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_actor_critic(env, actor, critic, num_episodes=100000, gamma=0.99, actor_lr=1e-4, critic_lr=1e-3):
    actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)
    mse_loss = nn.MSELoss()

    for episode in range(num_episodes):
        state = env.reset()
        state = torch.FloatTensor(state)
        done = False
        total_reward = 0
        step_count = 0

        while not done:
            action = actor(state)
            action = action.detach().numpy()
            next_state, reward, done, _ = env.step(action)
            next_state = torch.FloatTensor(next_state)
            reward = torch.tensor(reward, dtype=torch.float32)

            # Update Critic
            value = critic(state)
            next_value = critic(next_state)
            target = reward + (1 - done) * gamma * next_value
            critic_loss = mse_loss(value, target.detach())
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()

            # Update Actor
            advantage = (reward + (1 - done) * gamma * next_value - value).detach()
            actor_loss = -advantage * torch.sum(actor(state) * torch.FloatTensor(action))
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()

            state = next_state
            total_reward += reward.item()
            step_count += 1

        print(f"Episode: {episode+1}, Total Reward: {total_reward}, Steps: {step_count}, Calories: {env.state[0]}")

# Instantiate the environment
env = CustomEnv(ingredient_df)

# Instantiate the actor and critic networks
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)

# Train the actor-critic model
train_actor_critic(env, actor, critic, num_episodes=50)

print(env.action_space.sample())


Episode: 1, Total Reward: -5.0, Steps: 5, Calories: 5100.39990234375
Episode: 2, Total Reward: -5.0, Steps: 5, Calories: 5007.2001953125
Episode: 3, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 4, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 5, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 6, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 7, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 8, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 9, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 10, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 11, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 12, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 13, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 14, Total Reward: -5.0, Steps: 5, Calories: 4890.7998046875
Episode: 15, Total Reward: -5.0, Steps: 5,

In [47]:
def train_actor_critic(env, actor, critic, num_episodes=1000, gamma=0.99, actor_lr=1e-4, critic_lr=1e-3):
    actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)
    mse_loss = nn.MSELoss()

    for episode in range(num_episodes):
        state = env.reset()
        state = torch.FloatTensor(state)
        done = False
        total_reward = 0
        step_count = 0

        while not done:
            action = actor(state)
            action = action.detach().numpy()
            next_state, reward, done, _ = env.step(action)
            next_state = torch.FloatTensor(next_state)
            reward = torch.tensor(reward, dtype=torch.float32)

            # Update Critic
            value = critic(state)
            next_value = critic(next_state)
            target = reward + (1 - done) * gamma * next_value
            critic_loss = mse_loss(value, target.detach())
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()

            # Update Actor
            advantage = (reward + (1 - done) * gamma * next_value - value).detach()
            actor_loss = -advantage * torch.sum(actor(state) * torch.FloatTensor(action))
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()

            state = next_state
            total_reward += reward.item()
            step_count += 1

        print(f"Episode: {episode+1}, Total Reward: {total_reward}, Steps: {step_count}")

# Instantiate the environment
env = CustomEnv(ingredient_df)

# Instantiate the actor and critic networks
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)

# Train the actor-critic model
train_actor_critic(env, actor, critic, num_episodes=50)


TypeError: can't multiply sequence by non-int of type 'float'