# Imports

In [1]:
import torch
import cgd_utils

# Cournot Simulation

Our profit for each player $i$ is defined as the following:
\begin{gather}
\Pi_i = P\left(\sum_j{q_j}\right) \cdot q_i -C_i(q_i)
\end{gather}

Thus, to solve for the Nash equilbrium, we take the first derivative and set it to zero:
\begin{gather}
\frac{\partial\Pi_i}{\partial q_i} = \frac{\partial P\left(\sum_j{q_j}\right)}{\partial q_i} \cdot q_i + P\left(\sum_j{q_j}\right) - \frac{\partial C_i (q_i)}{\partial q_i} = 0
\end{gather}

For the example below, this becomes the following:
\begin{gather}
-1 \cdot q_i + \left(100 - \sum_j {q_j}\right) - 10 = 0
\end{gather}

Solving this, we get $q_i = \frac{45}{2}$ (which is what our algorithm converges to)

In [2]:
def player_payoffs(quantity_tensor,
                   market_demand=lambda q: 100 - q,
                   marginal_cost=lambda q: q * 10):
    price = torch.max(market_demand(torch.sum(quantity_tensor)),
                      torch.tensor(0., requires_grad=True))

    payoffs = []
    for i, quantity in enumerate(quantity_tensor):
        payoffs.append(- (quantity * price - marginal_cost(quantity)))
        
    return torch.stack(payoffs)

# Running Simulation

In [8]:
num_iterations = 100

# Define individual sellers and prices
p1 = torch.tensor([50.], requires_grad=True)
p2 = torch.tensor([0.], requires_grad=True)
p3 = torch.tensor([40.], requires_grad=True)

players = torch.stack([p1, p2, p3])

learning_rates = [0.1, 0.1, 0.1]

for i in range(num_iterations):
    
    payoffs = player_payoffs(players)
    
    updates, n_iter = cgd_utils.metamatrix_conjugate_gradient(payoffs, [p1, p2, p3], lr_list=learning_rates)
    
    for player, update in zip(players, updates):
        player.data.add_(update)

print(players)
print(payoffs)


tensor([[22.5002],
        [22.4998],
        [22.5001]], grad_fn=<StackBackward>)
tensor([[-506.2539],
        [-506.2442],
        [-506.2520]], grad_fn=<StackBackward>)
