In [7]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt

# Load the dataset
orders = pd.read_csv('/content/olist_orders_dataset.csv')
order_items = pd.read_csv('/content/olist_order_items_dataset.csv')
products = pd.read_csv('/content/olist_products_dataset.csv')

# Merge the datasets to get a complete view
df = pd.merge(order_items, orders, on="order_id")
df = pd.merge(df, products, on="product_id")

# Convert order_purchase_timestamp to datetime
df['order_purchase_timestamp'] = pd.to_datetime(df['order_purchase_timestamp'])

# Feature engineering: Extracting year, month, and day from order_purchase_timestamp
df['year'] = df['order_purchase_timestamp'].dt.year
df['month'] = df['order_purchase_timestamp'].dt.month
df['day'] = df['order_purchase_timestamp'].dt.day

# Convert product_id and order_item_id to numerical values
df['product_id'] = df['product_id'].astype('category').cat.codes
df['order_item_id'] = df['order_item_id'].astype('category').cat.codes

# Check for missing values
df = df.dropna()

# Select relevant features for the RL environment
df = df[['order_id', 'product_id', 'price', 'freight_value', 'order_item_id', 'year', 'month', 'day']]

# Normalize the price and freight_value columns
df['price'] = (df['price'] - df['price'].min()) / (df['price'].max() - df['price'].min())
df['freight_value'] = (df['freight_value'] - df['freight_value'].min()) / (df['freight_value'].max() - df['freight_value'].min())

class PriceOptimizerEnv(gym.Env):
    def __init__(self, df):
        super(PriceOptimizerEnv, self).__init__()
        self.df = df
        self.current_step = 0
        self.action_space = spaces.Discrete(10)  # 10 possible price levels
        self.observation_space = spaces.Box(low=0, high=1, shape=(9,), dtype=np.float32)  # year, month, day, product_id, order_item_id, price, freight_value, stock_quantity, demand

    def reset(self):
        self.current_step = 0
        self.current_data = self.df.sample()
        self.state = self._get_state()
        return self.state

    def _get_state(self):
        stock_quantity = np.random.rand()
        demand = np.random.rand()
        state = self.current_data[['year', 'month', 'day', 'product_id', 'order_item_id', 'price', 'freight_value']].values.flatten().astype(np.float32)
        state = np.append(state, [stock_quantity, demand])
        return state

    def step(self, action):
        reward = self._get_reward(action)
        self.current_step += 1
        done = self.current_step >= len(self.df)
        self.state = self._get_state()
        return self.state, reward, done, {}

    def _get_reward(self, action):
        reward = np.random.rand()  # Mock reward
        return reward

# Create the environment
env = PriceOptimizerEnv(df)


In [8]:
# Loading the model
model_filename = '/content/dqn_price_optimizer_model.h5'
loaded_model = load_model(model_filename)
print(f"Model loaded from {model_filename}")


Model loaded from /content/dqn_price_optimizer_model.h5


**Using the Model for Dynamic Pricing**

In [12]:
def dynamic_pricing(model, stock_quantity, demand, current_state):
    """
    Use the trained model to determine the optimal price given the stock quantity and demand.

    :param model: Trained DQN model
    :param stock_quantity: Current stock quantity
    :param demand: Current demand
    :param current_state: Current state without stock quantity and demand
    :return: Optimal price level (action)
    """
    state = np.append(current_state, [stock_quantity, demand])
    state = np.reshape(state, [1, env.observation_space.shape[0]])
    action = np.argmax(model.predict(state)[0])
    return action

# Example usage
current_state = env.reset()[:-2]  # Get the initial state without stock quantity and demand
stock_quantity = 1  # Example stock quantity
demand = 2  # Example demand
optimal_price_level = dynamic_pricing(loaded_model, stock_quantity, demand, current_state)
print(f"Optimal Price Level: {optimal_price_level}")


Optimal Price Level: 4


**Simulating the Model with Dynamic Pricing**

In [None]:
def simulate_dynamic_pricing(env, model, stock_quantities, demands, episodes=10):
    all_rewards = []
    all_prices = []

    for episode in range(episodes):
        state = env.reset()[:-2]  # Get the initial state without stock quantity and demand
        total_reward = 0
        done = False
        episode_prices = []

        while not done:
            stock_quantity = np.random.choice(stock_quantities)
            demand = np.random.choice(demands)
            action = dynamic_pricing(model, stock_quantity, demand, state)
            next_state, reward, done, _ = env.step(action)
            state = next_state[:-2]  # Update state without stock quantity and demand
            total_reward += reward
            episode_prices.append(action)

        all_rewards.append(total_reward)
        all_prices.append(episode_prices)
        print(f"Episode {episode + 1}: Total Reward: {total_reward}")

    return all_rewards, all_prices

# Define possible stock quantities and demands
stock_quantities = [0.2, 0.4, 0.6, 0.8, 1.0]
demands = [0.2, 0.4, 0.6, 0.8, 1.0]

# Simulate the model with dynamic pricing
simulation_rewards, simulation_prices = simulate_dynamic_pricing(env, loaded_model, stock_quantities, demands, episodes=10)

# Plot the results
plt.plot(simulation_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Rewards per Episode during Dynamic Pricing Simulation')
plt.show()

# Plot the distribution of rewards
plt.hist(simulation_rewards, bins=10)
plt.xlabel('Total Reward')
plt.ylabel('Frequency')
plt.title('Distribution of Total Rewards during Dynamic Pricing Simulation')
plt.show()

# Display summary statistics
average_reward = np.mean(simulation_rewards)
max_reward = np.max(simulation_rewards)
min_reward = np.min(simulation_rewards)
std_reward = np.std(simulation_rewards)

print(f"Average Reward: {average_reward}")
print(f"Max Reward: {max_reward}")
print(f"Min Reward: {min_reward}")
print(f"Standard Deviation of Reward: {std_reward}")

# Visualize the pricing decisions
episode_length = len(simulation_prices[0])
average_prices = np.mean(simulation_prices, axis=0)

plt.plot(range(episode_length), average_prices)
plt.xlabel('Step')
plt.ylabel('Average Price Level')
plt.title('Average Price Level per Step during Dynamic Pricing Simulation')
plt.show()
