In [8]:
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [9]:

class StockOptimizer:
    def __init__(self, num_stocks=5, time_steps=1000, exploration_param=2.5):
        self.num_stocks = num_stocks
        self.time_steps = time_steps
        self.exploration_param = exploration_param
        self.mean_estimates = np.zeros(num_stocks)
        self.stock_selection_count = np.zeros(num_stocks)
        self.q_value_history = np.zeros((time_steps, num_stocks))
        self.stock_means = [300, 450, 100, 550, 800]
        self.comparison_means = [400, 150, 500, 100, 80]
        self.std_dev = 50
        self.reward_log = []

    def _compute_reward(self, stock_idx):
        stock_viewers = np.random.normal(self.stock_means[stock_idx], self.std_dev)
        comparison_viewers = np.random.normal(self.comparison_means[stock_idx], self.std_dev)
        return (2 * stock_viewers) - comparison_viewers

    def _ucb_score(self, step):
        confidence_bounds = self.exploration_param * np.sqrt(np.log(step + 1) / (self.stock_selection_count + 1e-10))
        return self.mean_estimates + confidence_bounds

    def _choose_stock(self, step):
        if step < self.num_stocks:
            return step
        return np.argmax(self._ucb_score(step))

    def _update_estimate(self, stock_idx, reward):
        self.stock_selection_count[stock_idx] += 1
        step_size = 1 / self.stock_selection_count[stock_idx]
        self.mean_estimates[stock_idx] += step_size * (reward - self.mean_estimates[stock_idx])

    def optimize(self):
        for step in range(self.time_steps):
            selected_stock = self._choose_stock(step)
            reward = self._compute_reward(selected_stock)
            self._update_estimate(selected_stock, reward)
            self.reward_log.append(reward)
            self.q_value_history[step] = self.mean_estimates.copy()

        print(f"Optimal stock selection after training: Stock {np.argmax(self.mean_estimates)}")
        self._show_visualizations()

    def _show_visualizations(self):
        # Time series plot for Q-value estimates
        fig = go.Figure()
        for i in range(self.num_stocks):
            fig.add_trace(go.Scatter(
                x=np.arange(self.time_steps),
                y=self.q_value_history[:, i],
                mode='lines',
                name=f'Stock {i}',
                line=dict(width=2)
            ))
        fig.update_layout(
            title="Q-Value Estimates Over Time",
            xaxis_title="Time Steps",
            yaxis_title="Estimated Q-Value",
            template="plotly_dark",
            legend_title="Stocks"
        )
        fig.show()

        # Final estimates as a bar chart
        fig = px.bar(
            x=[f'Stock {i}' for i in range(self.num_stocks)],
            y=self.mean_estimates,
            title="Final Q-Value Estimates per Stock",
            labels={'x': 'Stock', 'y': 'Q-Value Estimate'},
            color=self.mean_estimates,
            color_continuous_scale="Teal"
        )
        fig.update_layout(template="plotly_white")
        fig.show()

        # Heatmap of Q-value estimates
        fig = go.Figure(data=go.Heatmap(
            x=np.arange(self.time_steps),
            y=[f'Stock {i}' for i in range(self.num_stocks)],
            z=self.q_value_history.T,
            colorscale='Cividis'
        ))
        fig.update_layout(
            title="Q-Value Evolution Heatmap",
            xaxis_title="Time Steps",
            yaxis_title="Stocks",
            template="plotly_dark"
        )
        fig.show()

In [10]:
stock_optimizer = StockOptimizer()
stock_optimizer.optimize()

Optimal stock selection after training: Stock 4
