In [17]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from stable_baselines3 import PPO  # Import the RL algorithm
from stable_baselines3.common.env_util import make_vec_env
import torch
from torch import nn
from torch.optim import Adam
import gymnasium as gym
from gym import spaces

# 1. Simulate User Behavior (Modified to Generate Excel)
def simulate_user_behavior(num_users, num_days, excel_file_path="user_behavior.xlsx"):
    np.random.seed(42)  # For reproducibility
    user_data = []
    for user_id in range(num_users):
        start_date = datetime.now() - timedelta(days=num_days)
        for day in range(num_days):
            date = start_date + timedelta(days=day)
            num_sessions = np.random.randint(0, 3)  # 0 to 2 sessions per day
            for _ in range(num_sessions):
                timestamp = date + timedelta(hours=np.random.randint(8, 22),
                                             minutes=np.random.randint(0, 60))
                time_of_day = "morning" if 8 <= timestamp.hour < 12 else (
                    "afternoon" if 12 <= timestamp.hour < 18 else "night")
                day_of_week = date.strftime("%A")  # Get the day of the week
                product_categories = ["electronics", "clothing", "books", "home goods"]
                viewed_category = random.choice(product_categories)
                action = np.random.choice(
                    ["visit", "view_product", "add_to_cart", "purchase", "drop_off"],
                    p=[0.4, 0.3, 0.15, 0.05, 0.1])
                channel_last = random.choice(["email", "sms", "push", None])
                user_data.append({
                    "user_id": user_id,
                    "timestamp": timestamp,
                    "time_of_day": time_of_day,
                    "day_of_week": day_of_week,  # Include day of week
                    "product_category": viewed_category,
                    "action": action,
                    "channel_last": channel_last,
                })
    df = pd.DataFrame(user_data)
    df['session_date'] = df['timestamp'].dt.date
    retention = {}
    for user_id in range(num_users):
        last_3_days_activity = df[
            (df['user_id'] == user_id) & (df['session_date'] >= (
                datetime.now().date() - timedelta(days=3)))]
        if ('purchase' in last_3_days_activity['action'].values) or (
                not ('drop_off' in last_3_days_activity['action'].values) and len(
                last_3_days_activity) > 0):
            retention[user_id] = 1
        else:
            retention[user_id] = 0
    df['returned_within_3_days'] = df['user_id'].map(retention)

    df.to_excel(excel_file_path, index=False)
    print(f"Saved user behavior data to {excel_file_path}")
    return df

In [13]:
def feature_engineering(df):
    """
    Engineers features from the simulated user behavior data.

    Args:
        df: The Pandas DataFrame containing the simulated user behavior data.

    Returns:
        A Pandas DataFrame with the engineered features.
    """
    def calculate_engagement_score(row):
        score = 0
        if row['action'] == 'view_product':
            score += 1
        elif row['action'] == 'add_to_cart':
            score += 2
        elif row['action'] == 'purchase':
            score += 3
        return score

    user_sessions = df.groupby('user_id').agg(
        last_session_time=('timestamp', 'max'),
        product_interest=('product_category', lambda x: ', '.join(set(x))),
        last_channel=('channel_last', 'last'),
        returned=('returned_within_3_days', 'max')  # If any session resulted in return
    ).reset_index()

    user_sessions['time_slot'] = pd.cut(user_sessions['last_session_time'].dt.hour,
                                       bins=[0, 12, 18, 24],
                                       labels=['morning', 'afternoon', 'night'],
                                       right=False)

    user_sessions['engagement_score'] = df.groupby('user_id')['action'].apply(
        lambda x: sum([calculate_engagement_score(row) for _, row in pd.DataFrame(
            {'action': x}).iterrows()]))

    user_sessions['recency'] = (datetime.now() - user_sessions['last_session_time']).dt.days
    user_sessions['frequency'] = df.groupby('user_id').size()
    user_sessions['monetary_proxy'] = df[df['action'] == 'add_to_cart'].groupby('user_id').size().fillna(0)

    #  Include day of week.  Make sure this works even if the column is already present.
    if 'day_of_week' not in user_sessions.columns:
        user_sessions = user_sessions.merge(df[['user_id', 'day_of_week']].drop_duplicates(subset='user_id'), on='user_id', how='left')
    user_sessions['offer_history'] = 0
    user_sessions['channel_preference'] = user_sessions['last_channel']

    return user_sessions

In [57]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

class TestEnv(gym.Env):
    """Minimal working environment for Gymnasium 1.1.1"""
    metadata = {'render_modes': ['human'], 'render_fps': 4}
    
    def __init__(self, render_mode=None):
        super().__init__()
        
        # Required attributes
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)
        
        # Render mode handling
        self.render_mode = render_mode
        
    def reset(self, seed=None, options=None):
        # Required to return (obs, info)
        super().reset(seed=seed)
        return np.zeros(4, dtype=np.float32), {}
    
    def step(self, action):
        # Must return (obs, reward, terminated, truncated, info)
        obs = np.random.rand(4).astype(np.float32)
        reward = float(np.random.rand())
        terminated = False
        truncated = False
        info = {}
        
        if self.render_mode == 'human':
            self.render()
            
        return obs, reward, terminated, truncated, info
    
    def render(self):
        if self.render_mode == 'human':
            print(f"Rendering environment state...")
        return None
    
    def close(self):
        # Clean up resources if needed
        pass

In [58]:
from stable_baselines3.common.env_checker import check_env
# Verify environment creation
env = TestEnv()
print(f"Is gym.Env: {isinstance(env, gym.Env)}")
print(f"Has action space: {env.action_space}")
print(f"Has observation space: {env.observation_space}")

# Test reset
obs, info = env.reset()
print(f"Reset obs shape: {obs.shape}, dtype: {obs.dtype}")

# Test step
action = env.action_space.sample()
result = env.step(action)
print(f"Step returns: {len(result)} elements")  # Should be 5

Is gym.Env: True
Has action space: Discrete(2)
Has observation space: Box(-1.0, 1.0, (4,), float32)
Reset obs shape: (4,), dtype: float32
Step returns: 5 elements


In [82]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import pandas as pd

class RecommendationEnv(gym.Env):
    """Fixed recommendation environment"""
    
    metadata = {'render_modes': ['console'], 'render_fps': 4}
    
    def __init__(self, user_data, action_list, render_mode=None):
        super().__init__()
        self.feature_names = [
            'recency',
            'frequency', 
            'monetary_value',
            'engagement_score',
            'discount_sensitive',
            'free_shipping_preferred',
            'email_opens',
            'sms_response',
            'push_response',
            'last_purchase_days'
        ]
        self.user_data = user_data
        self.action_list = action_list
        self.current_user_idx = 0
        
        # Define spaces
        self.action_space = spaces.Discrete(len(action_list))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32)
        self.render_mode = render_mode

    def reset(self, seed=None, options=None):
        """Proper reset implementation"""
        super().reset(seed=seed)
        self.current_user_idx = 0
        return self._get_state(self.current_user_idx), {}

    def step(self, action):
        """Fixed step implementation"""
        # Convert action to scalar if it's an array
        if isinstance(action, (np.ndarray, list)):
            action = action[0]
            
        reward = self._calculate_reward(action)
        self.current_user_idx += 1
        terminated = self.current_user_idx >= len(self.user_data)
        
        next_state = (self._get_state(self.current_user_idx) 
                    if not terminated 
                    else np.zeros(self.observation_space.shape, dtype=np.float32))
        
        return next_state, float(reward), terminated, False, {}

    def _get_state(self, user_idx):
        """Safe state generation"""
        user = self.user_data.iloc[user_idx]
        return np.array([
            float(user.get('recency', 0)),
            float(user.get('frequency', 0)),
            float(user.get('monetary_value', 0)),
            float(user.get('engagement_score', 0)),
            float(user.get('discount_sensitive', False)),
            float(user.get('free_shipping_preferred', False)),
            float(user.get('email_opens', 0)),
            float(user.get('sms_response', 0)),
            float(user.get('push_response', 0)),
            float(user.get('last_purchase_days', 0))
        ], dtype=np.float32)

    def _calculate_reward(self, action_idx):
        """Robust reward calculation"""
        user = self.user_data.iloc[self.current_user_idx]
        action = self.action_list[action_idx]
        
        reward = 0.1
        if action[0] == 'discount' and user.get('discount_sensitive', False):
            reward += 0.5
        if action[2] == 'email' and user.get('email_opens', 0) > 0.5:
            reward += 0.3
            
        return min(max(reward, 0), 1)  # Clip to [0,1]
    def format_state(self, state_values):
        return "\n".join([f"{name}: {value:.2f}" for name, value in zip(self.feature_names, state_values)])

def main():
    # 1. Generate realistic sample data
    num_users = 100
    user_data = pd.DataFrame({
        'recency': np.random.randint(1, 30, num_users),
        'frequency': np.random.randint(1, 10, num_users),
        'monetary_value': np.random.uniform(10, 200, num_users),
        'engagement_score': np.random.uniform(0, 1, num_users),
        'discount_sensitive': np.random.random(num_users) > 0.7,
        'free_shipping_preferred': np.random.random(num_users) > 0.7,
        'email_opens': np.random.uniform(0, 1, num_users),
        'sms_response': np.random.uniform(0, 1, num_users),
        'push_response': np.random.uniform(0, 1, num_users),
        'last_purchase_days': np.random.randint(1, 90, num_users)
    })
    
    # 2. Define simplified action space
    action_list = [
        ('discount', 10, 'email', 'immediate'),
        ('discount', 15, 'sms', 'immediate'),
        ('free_shipping', 0, 'email', 'next_day'),
        ('free_shipping', 0, 'push', 'immediate')
    ]
    
    # 3. Create and wrap environment
    env = DummyVecEnv([lambda: RecommendationEnv(user_data, action_list)])
    
    # 4. Train with robust settings
    model = PPO(
        "MlpPolicy",
        env,
        verbose=1,
        learning_rate=3e-4,
        n_steps=2048,
        batch_size=64,
        n_epochs=10,
        gamma=0.99,
        clip_range=0.2
    )
    
    # Train in smaller chunks
    model.learn(total_timesteps=1000)
    
    # 5. Test predictions safely
    test_users = user_data.sample(5)
    for idx, user in test_users.iterrows():
        try:
            # Get state directly from the underlying environment
            state = env.envs[0]._get_state(idx)
            
            # Ensure proper shape for prediction
            if state.ndim == 1:
                state = state.reshape(1, -1)
                
            action, _ = model.predict(state, deterministic=True)
            
            # Handle action output properly
            if isinstance(action, np.ndarray):
                action = action[0]
                
            print(f"\nUser {idx} recommendation: {action_list[action]}")
            print("State features:")
            
            print(env.envs[0].format_state(state.flatten()))
            
        except Exception as e:
            print(f"Error processing user {idx}: {str(e)}")

if __name__ == "__main__":
    main()

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 726  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------

User 28 recommendation: ('free_shipping', 0, 'email', 'next_day')
State features:
recency: 1.00
frequency: 7.00
monetary_value: 169.97
engagement_score: 0.40
discount_sensitive: 1.00
free_shipping_preferred: 1.00
email_opens: 0.56
sms_response: 0.88
push_response: 0.37
last_purchase_days: 25.00

User 50 recommendation: ('discount', 10, 'email', 'immediate')
State features:
recency: 4.00
frequency: 7.00
monetary_value: 66.73
engagement_score: 0.55
discount_sensitive: 0.00
free_shipping_preferred: 0.00
email_opens: 0.34
sms_response: 0.02
push_response: 0.08
last_purchase_days: 32.00

User 2 recommendation: ('free_shipping', 0, 'email', 'next_day')
State features:
recency: 4.00
frequency: 1.00
monetary_value: 147.66
engagement_score: 0.97
discount_sensitive: 0.00


In [71]:
model.save(r"C:\Users\prash\OneDrive\Desktop\agent_RL.pth")