In [1]:
import pandas as pd
import gym
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

In [62]:
data = pd.read_excel('/content/drive/MyDrive/Dataset/Marketing Campaign Dataset.xlsx')
# Inspect the first few rows
print(data.columns)

Index(['Campaign', 'Date', 'City/Location', 'Latitude', 'Longitude', 'Channel',
       'Device', 'Ad', 'Impressions', 'CTR, %', 'Clicks', 'Daily Average CPC',
       'Spend, GBP', 'Conversions', 'Total conversion value, GBP',
       'Likes (Reactions)', 'Shares', 'Comments'],
      dtype='object')


In [63]:
data=data.drop(['Latitude', 'Longitude'],axis=1)

In [64]:
obj_col=data.select_dtypes(include=['object']).columns.tolist()
le=LabelEncoder()
for i in obj_col: #down
    data[i] = le.fit_transform(data[i])

In [65]:
train_data=data.copy()
train_original_data = train_data.copy()

In [67]:
train_data['Date'] = pd.to_datetime(train_data['Date'])
train_data['DayOfWeek'] = train_data['Date'].dt.dayofweek
train_data['Month'] = train_data['Date'].dt.month

train_data = train_data.drop('Date', axis=1)

numerical_cols = ['Impressions', 'CTR, %', 'Clicks', 'Daily Average CPC', 'Spend, GBP', 'Conversions', 'Total conversion value, GBP', 'Likes (Reactions)', 'Shares', 'Comments']

# Normalize the numerical columns
scaler = MinMaxScaler()
train_data[numerical_cols] = scaler.fit_transform(train_data[numerical_cols])

In [68]:
from xgboost import XGBRegressor
reg_model = XGBRegressor(
    n_estimators=100,     # Number of trees
    learning_rate=0.1,    # Step size shrinkage
    max_depth=5,          # Maximum depth of a tree
    subsample=0.8,        # Fraction of samples to use for each tree
    colsample_bytree=0.8, # Fraction of features to use for each tree
    random_state=42       # For reproducibility
)

In [69]:
X = train_data[['Campaign', 'Channel', 'Device',
      'Spend, GBP','Daily Average CPC']]

y = train_data['Impressions']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

reg_model.fit(X_train, y_train)

y_pred = reg_model.predict(X_test)
within_range = np.abs(y_test - y_pred) <= (0.2 * np.abs(y_test))
accuracy = np.mean(within_range) * 100
print(f"Accuracy within 10% tolerance: {accuracy}%")

Accuracy within 10% tolerance: 75.81369248035915%


In [70]:
class MarketingEnv(gym.Env):
    def __init__(self, data, original_data,reg_model):
        super(MarketingEnv, self).__init__()
        self.data = data.reset_index(drop=True)
        self.original_data = original_data.reset_index(drop=True)
        self.current_step = 0
        self.reg_model=reg_model
        self.action_space = gym.spaces.Discrete(5)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(len(data.columns),), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        assert self.current_step < len(self.data), "Current step is out of bounds"
        return self.data.loc[self.current_step].values

    def cal_impressions(self):
      columns = ['Campaign', 'Channel', 'Device', 'Spend, GBP', 'Daily Average CPC']
      selected_data = self.original_data.loc[self.current_step, columns]
      self.original_data.iloc[self.current_step]['Impressions']=self.reg_model.predict([selected_data])

    def step(self, action):
        self._take_action(action)
        self.current_step += 1
        done = self.current_step >= len(self.data)
        reward = self._calculate_reward(action)
        if not done:
            assert self.current_step < len(self.data), "Current step is out of bounds"
            state = self.data.loc[self.current_step].values
        else:
            state = np.zeros(self.observation_space.shape)
        info = {}
        return state, reward, done, info

    def _take_action(self, action):
        if action == 0:
            self.original_data.at[self.current_step, 'Channel'] = 0 #Encoded label for Facebook
        elif action == 1:
            self.original_data.at[self.current_step, 'Channel'] = 1 #Encoded label for Instagram
        elif action == 2:
            self.original_data.at[self.current_step, 'Channel'] = 2 #Encoded label for Pintrest
        elif action == 3:  # Increase budget allocation to a specific channel
            channel = self.original_data.at[self.current_step, 'Channel']
            if channel == 'Facebook':
                self.original_data.at[self.current_step, 'Spend, GBP'] *= 1.08
            elif channel == 'Instagram':
                self.original_data.at[self.current_step, 'Spend, GBP'] *= 1.10
            elif channel == 'Pinterest':
                self.original_data.at[self.current_step, 'Spend, GBP'] *= 1.09
        elif action == 4:  # Decrease budget allocation to a specific channel
            channel = self.original_data.at[self.current_step, 'Channel']
            if channel == 'Facebook':
                self.original_data.at[self.current_step, 'Spend, GBP'] *= 0.8
            elif channel == 'Instagram':
                self.original_data.at[self.current_step, 'Spend, GBP'] *= 0.95
            elif channel == 'Pinterest':
                self.original_data.at[self.current_step, 'Spend, GBP'] *= 0.9
        self.cal_impressions()

    def _calculate_reward(self, action):
        if self.current_step >= len(self.original_data):
            return 0
        reward = 0
        if action == 0:  # Increase bid amount by 10%
            reward = self.original_data.iloc[self.current_step]['Impressions']
        elif action == 1:  # Decrease bid amount by 10%
            reward = self.original_data.iloc[self.current_step]['Impressions']
        elif action == 2:  # Increase budget allocation to a specific channel
            reward = self.original_data.iloc[self.current_step]['Impressions']
        elif action == 3:  # Decrease budget allocation to a specific channel
            reward = self.original_data.iloc[self.current_step]['Impressions']
        return reward

In [71]:
from stable_baselines3.common.vec_env import DummyVecEnv
# Create the environment
env = DummyVecEnv([lambda: MarketingEnv(train_data, train_original_data,reg_model)])

In [72]:
from stable_baselines3 import PPO
# Train the RL model
model = PPO('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=1000)
# Save the model
model.save("ppo_marketing")

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 280  |
|    iterations      | 1    |
|    time_elapsed    | 7    |
|    total_timesteps | 2048 |
-----------------------------


In [73]:
model = PPO.load("ppo_marketing")
#Test the model
obs = env.reset()
for i in range(len(train_data)-1):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    print(f"Step {i+1}")
    print(f"Action: {action}")  # The action taken by the model (0-4)
    print(f"Reward: {rewards}")  # The reward received for the action
    print(f"State: {obs}")  # The new state after taking the action
    print(f"Done: {dones}")  # Whether the episode is done
    env.render()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 8200
Action: [2]
Reward: [1575.8]
State: [[1.0000000e+00 1.0000000e+00 2.0000000e+00 0.0000000e+00 0.0000000e+00
  4.0946439e-01 4.9242425e-01 3.3270848e-01 4.2735045e-03 2.3040897e-03
  9.0909094e-02 0.0000000e+00 4.3682796e-01 1.5384616e-01 0.0000000e+00
  6.0000000e+00 3.0000000e+00]]
Done: [False]
Step 8201
Action: [2]
Reward: [1202.8]
State: [[1.         1.         1.         0.         1.         0.3124805
  0.56666666 0.2922432  0.36752138 0.17405175 0.45454547 0.24265645
  0.5913978  0.2905983  0.36842105 0.         5.        ]]
Done: [False]
Step 8202
Action: [3]
Reward: [3049.]
State: [[ 0.          1.          0.          1.          0.          0.7925117
   0.5984849   0.7824092   0.3195838   0.40520057  0.27272728  0.07867178
   0.4032258   0.05128205  0.14035088  2.         10.        ]]
Done: [False]
Step 8203
Action: [2]
Reward: [145.8]
State: [[2.         0.         2.         0.         0.         0

In [76]:
test_instance = train_data.iloc[0].values  # Use the first row of your training data as an example
   # or
   #test_instance = np.array([0, 1, 0, 0.5, 0.2])  # Create a custom instance

In [77]:
action, _states = model.predict(test_instance)

In [80]:
def interpret_action(action):
    if action == 0:
        return "Switching channel to Facebook"
    elif action == 1:
        return "Switching channel to Instagram"
    elif action == 2:
        return "Switching channel to Pinterest"
    elif action == 3:
        return "Increasing budget allocation to current channel"
    elif action == 4:
        return "Decreasing budget allocation to current channel"

interpret_action(action)

'Switching channel to Pinterest'