In [30]:
pip install gymnasium

Note: you may need to restart the kernel to use updated packages.


In [34]:
import pandas as pd
import numpy as np
import requests
import re
import json
import torch
from stable_baselines3 import PPO
import gymnasium as gym  # Updated to use Gymnasium
from gymnasium import spaces

In [11]:
import pandas as pd
import numpy as np
import requests
import re
import json
import torch
from stable_baselines3 import PPO
import gymnasium as gym
from gymnasium import spaces

# Replace with your actual OpenWeatherMap API key
API_KEY = "ff5de181316808cf75d49f481696c640"

# Load dataset
data = pd.read_csv('customer_data1.csv')

# Check if 'original_price' and 'discounted_price' exist, if not, add them
if 'original_price' not in data.columns:
    data['original_price'] = 100  # Default original price for testing
if 'discounted_price' not in data.columns:
    data['discounted_price'] = np.nan  # Placeholder for discounted prices

# Function to get weather for a given location using OpenWeatherMap API
def get_weather(location):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={location}&appid={API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        weather_data = response.json()
        weather_condition = weather_data['weather'][0]['main']
        return weather_condition
    else:
        return None

# Check if the product is relevant to the current weather
def is_product_relevant_to_weather(weather, product_category):
    weather_product_map = {
        "Rain": ["Raincoat", "Umbrella"],
        "Snow": ["Winter Jacket", "Snow Boots"],
        "Clear": ["Sunglasses", "T-Shirts"]
    }
    return product_category in weather_product_map.get(weather, [])

# Function to extract rating from text
def extract_rating(text):
    match = re.search(r'(\d+(\.\d+)?)\s*stars', str(text), re.IGNORECASE)
    json_match = re.search(r'"Rating":\s*(\d+)', str(text))
    
    if match:
        return float(match.group(1))
    elif json_match:
        return float(json_match.group(1))
    else:
        return np.nan

# Apply extraction function to create 'Product Rating' column
data['Product Rating'] = data['Product Reviews'].apply(extract_rating)
data['Product Rating'] = pd.to_numeric(data['Product Rating'], errors='coerce').fillna(data['Product Rating'].median())

# Process Purchase History to extract numeric information
def parse_purchase_history(history):
    try:
        purchases = json.loads(history.replace("'", '"'))
        num_purchases = len(purchases)
        total_spent = sum(item.get("Price", 0) for item in purchases if isinstance(item, dict))
        avg_purchase_amount = total_spent / num_purchases if num_purchases > 0 else 0
        return pd.Series([num_purchases, avg_purchase_amount])
    except (json.JSONDecodeError, KeyError, TypeError):
        return pd.Series([0, 0])

data[['Num Purchases', 'Avg Purchase Amount']] = data['Purchase History'].apply(parse_purchase_history)
data['Num Purchases'] = pd.to_numeric(data['Num Purchases'], errors='coerce').fillna(0)
data['Avg Purchase Amount'] = pd.to_numeric(data['Avg Purchase Amount'], errors='coerce').fillna(0)

# Preprocess the data
def preprocess_data(data):
    data['Location'] = data['Location'].astype(str)
    data['Gender'] = data['Gender'].astype(str)
    data = pd.get_dummies(data, columns=['Location', 'Gender'], drop_first=True)  # One-hot encode categorical variables
    
    data['Age'] = pd.to_numeric(data['Age'], errors='coerce').fillna(data['Age'].median())
    data['Time on Site'] = pd.to_numeric(data['Time on Site'], errors='coerce').fillna(data['Time on Site'].mean())
    data['Browsing History'] = pd.to_numeric(data['Browsing History'], errors='coerce').fillna(0)
    
    numeric_columns = ['Age', 'Time on Site', 'Product Rating', 'Browsing History', 'Num Purchases', 'Avg Purchase Amount']
    for column in numeric_columns:
        data[column] = (data[column] - data[column].min()) / (data[column].max() - data[column].min())

    # Ensure all columns are numeric and cast to float32 for compatibility
    data = data.apply(pd.to_numeric, errors='coerce').fillna(0).astype(np.float32)
    return data

data = preprocess_data(data)

# Function to calculate the dynamic price based on user attributes and weather
def calculate_dynamic_price(original_price, weather, Time_on_site, browsing_history, num_purchases, avg_purchase_amount, product_category):
    discount = 0.0

    if weather and is_product_relevant_to_weather(weather, product_category):
        discount += 0.10

    if browsing_history > 0.7:
        discount += 0.05
    elif browsing_history < 0.3:
        discount -= 0.05
    
    if Time_on_site > 0.5:
        discount += 0.05
    
    if num_purchases > 5:
        discount += 0.05
    if avg_purchase_amount > 50:
        discount += 0.05

    discount = min(discount, 0.30)
    final_price = original_price * (1 - discount)
    return max(final_price, 0)

results = []

# Custom Environment for Dynamic Pricing with PPO
class DynamicPricingEnv(gym.Env):
    def __init__(self, data):
        super(DynamicPricingEnv, self).__init__()
        self.data = data
        self.current_index = 0
        # self.manufacturing_cost = manufacturing_cost

        # Define action and observation space
        self.action_space = spaces.Box(low=0.5, high=1.5, shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(data.shape[1] - 2,), dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        self.current_index = 0
        observation = self._get_observation()
        info = {}
        return observation, info

    def _get_observation(self):
        user_data = self.data.iloc[self.current_index].drop(['original_price', 'discounted_price']).to_numpy()
        return user_data.astype(np.float32)  # Ensure observation is float32

    def step(self, action):
        user_data = self.data.iloc[self.current_index]
        Customer_ID = user_data["Customer ID"]
        Product_ID = user_data["Product ID"]
        original_price = user_data['original_price']
        manufacturing_cost = user_data['manufacturing_cost']
    
        # Since Location was one-hot encoded, let's assume a specific location or use default weather.
        weather = "Clear"  # Placeholder for a default or preprocessed value
        
        final_price = calculate_dynamic_price(
            original_price=original_price,
            weather=weather,
            Time_on_site=user_data['Time on Site'],
            browsing_history=user_data['Browsing History'],
            num_purchases=user_data['Num Purchases'],
            avg_purchase_amount=user_data['Avg Purchase Amount'],
            product_category="Winter Jacket"
        )
    
        suggested_price = final_price * action[0]
    
        if suggested_price < user_data['manufacturing_cost']:
            suggested_price = user_data['manufacturing_cost']
    
        profit_margin = suggested_price - user_data['manufacturing_cost']
        reward = 0
        purchase_prob = np.random.rand()
    
        if purchase_prob > 0.5:
            reward += 10
            if profit_margin > 0:
                reward += 5
            else:
                reward -= 5
        else:
            reward -= 5
    
        # Log the results
        results.append({
            "Episode": self.current_index + 1,
            "Manufacturing Cost": manufacturing_cost,
            "Original Price": original_price,
            "Suggested Price": suggested_price,
            "Product ID": Product_ID,
            "Customer ID": Customer_ID,
            "Reward": reward
        })
    
        self.current_index = (self.current_index + 1) % len(self.data)
        observation = self._get_observation()
        
        # Set termination and truncation conditions
        terminated = False  # Set this to True if the episode should end after a step
        truncated = False  # Set this to True if the episode is cut short by time/other criteria
    
        info = {"suggested_price": suggested_price}
    
        return observation, reward, terminated, truncated, info


    def render(self, mode='console'):
        pass

# Environment setup
# manufacturing_cost = 50
# manufacturing_cost = user_data['manufacturing_cost']
# env = gym.vector.SyncVectorEnv([lambda: DynamicPricingEnv(data, manufacturing_cost)])
env = gym.vector.SyncVectorEnv([lambda: DynamicPricingEnv(data)])

# Initialize PPO model with environment and train
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=1000)

# Test the trained model for a few user cases
obs, _ = env.reset()
for _ in range(5):
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    # suggested_price = info["suggested_price"]  # Access directly without [0]
    # print(f"Manufacturing Price: {manufacturing_cost}")
    # print(f"Original Price: {data.iloc[env.envs[0].current_index]['original_price']}")
    # print(f"Suggested Price: {suggested_price}")
    # print(f"Reward: {reward}")

results_df = pd.DataFrame(results)
results_df.to_csv("pricing_results.csv", index=False)

print("Results saved to pricing_results.csv")

# Save the model
model.save("dynamic_pricing_model")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 617  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
Results saved to pricing_results.csv
