In [132]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [133]:
dataset = pd.read_csv("dataset/SampleData.csv")
dataset.head(12)

Unnamed: 0,Ingredient,Product,QMerged_label,Real_Cost,V_0
0,1,1,11,10,0
1,1,2,12,6,0
2,2,1,21,8,0
3,2,2,22,11,0
4,3,1,31,3,0
5,3,2,32,7,0
6,4,1,41,8,0
7,4,2,42,5,0
8,4,3,43,1,0


In [134]:
class MonteCarlo:
    def __init__(self, dataset, num_episodes, epsilon, budget, reward, alpha):
        self.state_space = list(set(dataset['Ingredient']))
        self.alpha = alpha
        self.epsilon = epsilon
        self.budget = budget
        self.reward = reward
        self.num_episodes = num_episodes

    def run(self, dataset):
        v0 = dataset['V_0']
        dataset['V'] = v0
        output = []
        output1 = []
        output2 = []
        action_in_full = []

        for episode in range(self.num_episodes):
            
            if episode == 0:
                episode_run = np.random.randint(low=1, high=len(self.state_space) + 1, size=len(self.state_space))
                print(f'Episode Run: {episode_run}')

            else:
                episode_run = [
                    np.random.randint(low=1, high=sum(1 for p in dataset.iloc[:, 0] if p == i + 1))
                    if np.random.randint(low=1, high=11) <= (self.epsilon * 10)
                    else dataset[dataset['Ingredient'] == (i + 1)].nlargest(1, 'V')['Product'].values[0]
                    for i in range(len(self.state_space))
                ]

            episode_df = pd.DataFrame({'Ingredient': self.state_space, 'Product': episode_run})
            episode_df['Merged_label'] = (episode_df['Ingredient'] * 10 + episode_df['Product']).astype(float)
            dataset['QMerged_label'] = dataset['QMerged_label'].astype(float)
            dataset['Reward'] = self.reward

            episode2 = episode_df.merge(dataset[['QMerged_label', 'Real_Cost', 'Reward']],
                                        left_on='Merged_label', right_on='QMerged_label', how='inner')

            dataset = dataset.drop(columns='Reward')


            # reward function
            if self.budget >= episode2['Real_Cost'].sum():
                return_value = 1
            else:
                return_value = -1

            episode2 = episode2.drop(columns='Reward')
            episode2['Return'] = return_value
            dataset = dataset.merge(episode2[['Merged_label', 'Return']], left_on='QMerged_label',
                                    right_on='Merged_label', how='outer')
            dataset['Return'] = dataset['Return'].fillna(0)

            for v in range(len(dataset)):
                if dataset.iloc[v, 7] == 0:
                    dataset.iloc[v, 5] = dataset.iloc[v, 5]
                else:
                    dataset.iloc[v, 5] = dataset.iloc[v, 5] + self.alpha * (
                            (dataset.iloc[v, 7] / len(self.state_space)) - dataset.iloc[v, 5])

            dataset = dataset.drop(columns='Merged_label')
            dataset = dataset.drop(columns='Return')

            output = np.append(output, dataset.iloc[:, -1].sum())
            output1 = np.append(output1, dataset.iloc[[1, 2, 4, 8], -1].sum())
            output2 = np.append(output2, dataset.iloc[[0, 3, 5, 6, 7], -1].sum())

            action = pd.DataFrame(dataset.groupby('Ingredient')['V'].max())
            action2 = action.merge(dataset, left_on='V', right_on='V', how='inner')
            action3 = action2.groupby('Ingredient')['Product'].apply(
                lambda x: x.iloc[np.random.randint(0, len(x))])

            action_in_full = np.append(action_in_full, action3)
            action_in_full = action_in_full.astype(int)

        return output, output1, output2, action3, dataset, action_in_full
            

In [135]:
reward = [0,0,0,0,0,0,0,0,0]
model = MonteCarlo(dataset=dataset, num_episodes=100, epsilon=0.6, budget=100, reward=reward, alpha=0.1)
driver = model.run(dataset=dataset)

Episode Run: [1 3 2 2]


In [136]:
print(driver[3])

Ingredient
1    1
2    1
3    1
4    2
Name: Product, dtype: int64


In [137]:
class DQNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNetwork, self).__init__()
        self.layer1 = nn.Linear(state_size, 64)
        self.layer2 = nn.Linear(64,action_size)


    def feed_forward(self, state):
        x = torch.relu(self.layer1(state))
        return self.layer2(x)
