In [4]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import random
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [5]:
#Loading and preparing data
data = pd.DataFrame({
    'Product ID': ['P6879', 'P5132', 'P2941'],
    'Product Name': ['Jacket', 'Camera', 'Sneakers'],
    'Category': ['Apparel', 'Electronics', 'Footwear'],
    'Price': [53.85, 761.26, 1756.76],
    'Discount': [5, 10, 5],
    'Tax Rate': [15, 15, 8],
    'Stock Level': [150, 224, 468],
    'Supplier ID': ['S535', 'S583', 'S118'],
    'Customer Age Group': ['35-44', '25-34', '25-34'],
    'Customer Location': ['New York, USA', 'London, UK', 'Tokyo, Japan'],
    'Customer Gender': ['Male', 'Female', 'Non-Binary'],
    'Shipping Cost': [23.32, 20.88, 16.43],
    'Shipping Method': ['Standard', 'Overnight', 'Standard'],
    'Return Rate': [4.49, 16.11, 4.93],
    'Seasonality': ['Yes', 'No', 'No'],
    'Popularity Index': [56, 79, 40]
})

In [6]:
# Converting categorical data to numerical
category_encoding = {'Apparel': 0, 'Electronics': 1, 'Footwear': 2}
data['Category'] = data['Category'].map(category_encoding)
data['Seasonality'] = data['Seasonality'].apply(lambda x: 1 if x == 'Yes' else 0)

def prepare_state(row):
    return np.array([
        row['Category'], row['Price'], row['Discount'], row['Tax Rate'],
        row['Stock Level'], row['Shipping Cost'], row['Return Rate'], row['Seasonality'],
        row['Popularity Index']
    ])

In [7]:
# Define the Deep Q-Network (DQN)
def build_model():
    model = Sequential([
        Dense(64, activation='relu', input_dim=9),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(3, activation='linear')  # Output: -5%, 0%, +5% price change
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

model = build_model()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Step 4: Define the environment
class PricingEnv:
    def __init__(self, data):
        self.data = data
        self.current_index = 0
        self.reset()

    def reset(self):
        self.current_product = self.data.iloc[self.current_index]
        self.state = prepare_state(self.current_product)
        self.current_index = (self.current_index + 1) % len(self.data)
        return self.state

    def step(self, action):
        price_change = [-0.05, 0, 0.05][action]
        new_price = self.current_product['Price'] * (1 + price_change)

        # Simulate demand change
        demand_factor = (100 - self.current_product['Popularity Index']) / 100
        sales_volume = max(1, int(10 * (1 - demand_factor * abs(price_change))))
        profit = (new_price - self.current_product['Price']) * sales_volume

        reward = profit - self.current_product['Shipping Cost'] * sales_volume
        done = True
        return prepare_state(self.current_product), reward, done

In [9]:
# Step 5: Train the model
env = PricingEnv(data)

num_episodes = 500
for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        if np.random.rand() < 0.1:
            action = np.random.choice(3)  # Explore
        else:
            action = np.argmax(model.predict(state.reshape(1, -1), verbose=0))

        next_state, reward, done = env.step(action)
        total_reward += reward

        target = reward
        target_vec = model.predict(state.reshape(1, -1), verbose=0)[0]
        target_vec[action] = target
        model.fit(state.reshape(1, -1), target_vec.reshape(-1, 3), epochs=1, verbose=0)

        state = next_state

    if episode % 50 == 0:
        print(f"Episode {episode}, Total Reward: {total_reward}")

Episode 0, Total Reward: -530.4869999999999
Episode 50, Total Reward: -185.64749999999998
Episode 100, Total Reward: 642.6719999999997
Episode 150, Total Reward: 154.6469999999999
Episode 200, Total Reward: -233.2
Episode 250, Total Reward: 642.6719999999997
Episode 300, Total Reward: 154.6469999999999
Episode 350, Total Reward: -185.64749999999998
Episode 400, Total Reward: 642.6719999999997
Episode 450, Total Reward: 154.6469999999999


In [10]:
# Step 6: Test the model
predicted_prices = []
actual_prices = []

for _, row in data.iterrows():
    state = prepare_state(row)
    action = np.argmax(model.predict(state.reshape(1, -1), verbose=0))
    price_change = [-0.05, 0, 0.05][action]
    new_price = row['Price'] * (1 + price_change)
    predicted_prices.append(new_price)
    actual_prices.append(row['Price'])
    print(f"Product: {row['Product Name']}, Original Price: ${row['Price']:.2f}, New Price: ${new_price:.2f}")

Product: Jacket, Original Price: $53.85, New Price: $51.16
Product: Camera, Original Price: $761.26, New Price: $799.32
Product: Sneakers, Original Price: $1756.76, New Price: $1844.60


In [11]:
# Step 7: Evaluate model performance
mae = mean_absolute_error(actual_prices, predicted_prices)
mse = mean_squared_error(actual_prices, predicted_prices)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

Mean Absolute Error (MAE): 42.86
Mean Squared Error (MSE): 3057.19
Root Mean Squared Error (RMSE): 55.29
