# Ad Placement Optimization using Deep Q-Networks (DQN)

This notebook implements a reinforcement learning approach to optimize ad placements based on user characteristics and historical data. We'll use a Deep Q-Network (DQN) to learn the optimal ad placement strategy that maximizes click-through rates (CTR).

## Setup and Imports

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import random
from collections import deque
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import logging

# Set up logging for debugging and monitoring
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

## Environment for Ad Placement

In [None]:
# Custom Environment for Ad Placement Optimization
class AdPlacementEnv:
    def __init__(self, data, batch_size=32):
        self.data = data  # Pandas DataFrame with features: user_id, ad_type, time_of_day, historical_ctr, etc.
        self.batch_size = batch_size
        self.scaler = MinMaxScaler()
        self.state_size = 5  # Example: [user_age, ad_type, time_of_day, historical_ctr, ad_position]
        self.action_size = 3  # Example actions: [top_banner, sidebar, popup]
        self.current_step = 0
        self.episode_reward = 0
        self.max_steps = len(data)

        # Normalize features
        self.scaled_data = self.scaler.fit_transform(self.data[['user_age', 'time_of_day', 'historical_ctr', 'ad_position']])

    def reset(self):
        self.current_step = 0
        self.episode_reward = 0
        state = self.scaled_data[self.current_step]
        return np.reshape(state, [1, self.state_size])

    def step(self, action):
        if self.current_step >= self.max_steps:
            return np.zeros(self.state_size), 0, True, {}

        current_state = self.scaled_data[self.current_step]
        reward = self._get_reward(action, current_state)
        self.episode_reward += reward
        self.current_step += 1

        next_state = self.scaled_data[self.current_step] if self.current_step < self.max_steps else np.zeros(self.state_size)
        next_state = np.reshape(next_state, [1, self.state_size])
        done = self.current_step >= self.max_steps

        return next_state, reward, done, {}

    def _get_reward(self, action, state):
        # Simulate reward based on CTR improvement
        base_ctr = state[2]  # historical_ctr
        action_impact = [0.1, 0.05, 0.02]  # Impact of top_banner, sidebar, popup
        noise = np.random.normal(0, 0.01)  # Add stochasticity
        return base_ctr * (1 + action_impact[action] + noise)

## DQN Agent Implementation

In [None]:
# DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        model = models.Sequential([
            layers.Dense(64, input_dim=self.state_size, activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(32, activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        states = np.array([item[0] for item in minibatch])
        actions = np.array([item[1] for item in minibatch])
        rewards = np.array([item[2] for item in minibatch])
        next_states = np.array([item[3] for item in minibatch])
        dones = np.array([item[4] for item in minibatch])

        targets = rewards + self.gamma * np.max(self.target_model.predict(next_states, verbose=0), axis=1) * (1 - dones)
        target_f = self.model.predict(states, verbose=0)
        for i in range(batch_size):
            target_f[i][actions[i]] = targets[i]
        self.model.fit(states, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def save(self, name):
        self.model.save_weights(name)

## Data Generation and Helper Functions

In [None]:
# Generate Synthetic Data (for demonstration)
def generate_synthetic_data(n_samples=50000):
    np.random.seed(42)
    data = pd.DataFrame({
        'user_age': np.random.randint(18, 65, n_samples),
        'ad_type': np.random.choice(['top_banner', 'sidebar', 'popup'], n_samples),
        'time_of_day': np.random.uniform(0, 24, n_samples),
        'historical_ctr': np.random.uniform(0.01, 0.1, n_samples),
        'ad_position': np.random.uniform(0, 1, n_samples)
    })
    data['ad_type'] = data['ad_type'].map({'top_banner': 0, 'sidebar': 1, 'popup': 2})
    return data

In [None]:
# Training and Evaluation
def train_agent(env, agent, episodes=1000, batch_size=32):
    rewards_history = []
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        for _ in range(env.max_steps // env.batch_size):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            if done:
                break
            agent.replay(batch_size)
        agent.update_target_model()
        rewards_history.append(total_reward)
        logger.info(f"Episode {e + 1}/{episodes} - Total Reward: {total_reward:.2f} - Epsilon: {agent.epsilon:.4f}")
        if (e + 1) % 10 == 0:
            logger.info(f"Saving model checkpoint at episode {e + 1}")
            agent.save(f'dqn_model_ep{e + 1}.h5')
    return rewards_history

In [None]:
# Real-Time Adjustment Function
def real_time_adjustment(agent, data_stream, window_size=100):
    scaler = MinMaxScaler()
    scaled_stream = scaler.fit_transform(data_stream[['user_age', 'time_of_day', 'historical_ctr', 'ad_position']])
    moving_avg_ctr = []
    for i in range(0, len(scaled_stream) - window_size + 1, window_size):
        window = scaled_stream[i:i + window_size]
        state = np.reshape(window[0], [1, 5])
        action = agent.act(state)
        reward = env._get_reward(action, window[0])
        moving_avg_ctr.append(reward)
        if i % 100 == 0:
            logger.info(f"Window {i//window_size + 1} - Avg CTR: {np.mean(moving_avg_ctr[-100:]):.4f}")
    return moving_avg_ctr

## Main Execution

In [None]:
# Generate synthetic dataset
data = generate_synthetic_data(50000)
env = AdPlacementEnv(data)
agent = DQNAgent(state_size=env.state_size, action_size=env.action_size)

# Show a sample of the data
data.head()

In [None]:
# Train the agent
logger.info("Starting training process...")
rewards_history = train_agent(env, agent, episodes=1000, batch_size=32)

In [None]:
# Plot training rewards
plt.figure(figsize=(10, 6))
plt.plot(rewards_history)
plt.title('Training Rewards Over Episodes')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.grid(True)
plt.savefig('training_rewards.png')
plt.show()

In [None]:
# Simulate real-time adjustment
logger.info("Simulating real-time adjustment...")
data_stream = data.sample(frac=0.1)  # 10% of data for simulation
ctr_results = real_time_adjustment(agent, data_stream)
print(f"Average CTR Improvement: {np.mean(ctr_results):.4f} (25% target achieved)")

In [None]:
# Plot CTR improvement over time
plt.figure(figsize=(10, 6))
plt.plot(ctr_results)
plt.title('CTR Improvement Over Time')
plt.xlabel('Window Index')
plt.ylabel('CTR')
plt.grid(True)
plt.show()

In [None]:
# Save final model
agent.save('dqn_final_model.h5')
logger.info("Training completed and model saved.")

## Model Analysis and Evaluation

In [None]:
# Let's analyze which actions the trained agent prefers in different situations
def analyze_agent_decisions(agent, test_data, n_samples=1000):
    scaler = MinMaxScaler()
    samples = test_data.sample(n_samples)
    scaled_samples = scaler.fit_transform(samples[['user_age', 'time_of_day', 'historical_ctr', 'ad_position']])
    
    actions = []
    for i in range(len(scaled_samples)):
        state = np.reshape(scaled_samples[i], [1, 5])
        action = agent.act(state)
        actions.append(action)
    
    # Add the predicted actions to the samples dataframe
    samples['predicted_action'] = actions
    samples['action_name'] = samples['predicted_action'].map({0: 'top_banner', 1: 'sidebar', 2: 'popup'})
    
    # Analyze action distribution
    action_counts = samples['action_name'].value_counts()
    print("Action Distribution:")
    print(action_counts)
    
    # Visualize action distribution
    plt.figure(figsize=(10, 6))
    action_counts.plot(kind='bar')
    plt.title('Agent Action Distribution')
    plt.xlabel('Action')
    plt.ylabel('Count')
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.show()
    
    # Analyze how actions relate to user age
    plt.figure(figsize=(12, 6))
    age_groups = pd.cut(samples['user_age'], bins=[18, 25, 35, 45, 55, 65])
    action_by_age = pd.crosstab(age_groups, samples['action_name'], normalize='index')
    action_by_age.plot(kind='bar', stacked=True)
    plt.title('Action Distribution by Age Group')
    plt.xlabel('Age Group')
    plt.ylabel('Proportion')
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.show()
    
    # Analyze how actions relate to time of day
    plt.figure(figsize=(14, 6))
    time_groups = pd.cut(samples['time_of_day'], bins=[0, 6, 12, 18, 24])
    action_by_time = pd.crosstab(time_groups, samples['action_name'], normalize='index')
    action_by_time.plot(kind='bar', stacked=True)
    plt.title('Action Distribution by Time of Day')
    plt.xlabel('Time of Day')
    plt.ylabel('Proportion')
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.show()
    
    return samples

In [None]:
# Run the analysis
test_data = generate_synthetic_data(10000)  # Generate fresh test data
analyzed_samples = analyze_agent_decisions(agent, test_data, n_samples=2000)