In [1]:
import random
import numpy as np
import matplotlib
import time
from numba import jit
import multiprocessing as mp
from matplotlib import pyplot as plt

# Demand function

In [3]:
@jit(nopython=True)
def demand(p1, p2):
    if p1 < p2:
        return (1 - p1)
    elif p1 > p2:
        return 0
    else:
        return (0.5 * (1 - p1))

# Profit function

In [4]:
@jit(nopython=True)
def profit(p1, p2):
    return (p1 * demand(p1, p2))

In [5]:
P = np.array([0, 1/6, 2/6, 3/6, 4/6, 5/6, 1]) # price array

# Hjælpefunktion..

In [None]:
def epsilon_greedy(Qtable, epsilon, uniform, state: int, price_grid: np.ndarray) -> int: 
    N = len(price_grid)
    assert Qtable.shape[0] == N, "Qtable must have the same number of rows as there are prices in the grid"
    assert Qtable.shape[1] == N, "Qtable must have the same number of columns as there are prices in the grid"
    assert state < N, "state must be a valid index in the price grid"
    assert state >= 0, "state must be a valid index in the price grid"

    # Draw action 
    if uniform < epsilon:
        price_index = np.random.choice(N)
    else:
        price_index = np.argmax(Qtable[:, state])
    return price_index

# Q-learning and simulation function

In [None]:
@jit(nopython=True)
def Qfunction(price_grid, period, delta, alpha, theta):
    # Initialize prices and Q-tables
    price_index_1 = np.random.choice(len(price_grid)) 
    price_index_2 = np.random.choice(len(price_grid)) 

    state = np.random.choice(len(price_grid))

    Qtable_1 = np.zeros((len(price_grid), len(price_grid)))
    Qtable_2 = np.zeros((len(price_grid), len(price_grid)))

    epsilons = (1 - theta)**np.arange(period + 1)
    uniforms = np.random.uniform(0, 1, (period + 1, 2))

    for t in range(t, period + 1):
        epsilon = (1 - theta)**t

        # figure out who's turn it is
        if t % 2 == 0:
            # player 2 is the responder
            # set up pointers to the state and the Q table 
            state = price_index_1 # the most recent draw of player 1's price
            Qtable_i = Qtable_2 
            Qtable_j = Qtable_1
        else:
            # player 1 is the responder
            state = price_index_2
            Qtable_i = Qtable_1
            Qtable_j = Qtable_2

        # draw 1's action 
        # (actually, player i's action is already simulated by player j in the previous period)
        price_index_i = epsilon_greedy(Qtable_i, epsilon, uniforms[t, 0], state=state, price_grid=price_grid)

        # current period 
        decision = price_grid[price_index_i]
        state_i_responds_to = price_grid[state]
        profit_i_current_period = profit(decision, state_i_responds_to)

        # next period 
        state_next_period = price_index_i # next_period's state is today's price
        price_index_j = epsilon_greedy(Qtable_j, epsilon, uniforms[t, 1], state=state_next_period, price_grid=price_grid)

        price_j_next_period = price_grid[price_index_j]
        price_i_next_period = price_grid[price_index_i] # unchanged price, it's not i's turn 
        profit_i_next_period = profit(price_i_next_period, price_j_next_period)

        max_Q = np.max(Qtable_i[:, price_index_j])
        continuation_value = max_Q 

        new_Q = profit_i_current_period + delta * profit_i_next_period + delta**2 * continuation_value

        # Update
        prev_estimate = Qtable_i[price_index_i, price_index_j]
        Qtable_i[price_index_i, price_index_j] = (1 - alpha) * prev_estimate + alpha * new_Q

        # gemme priserne
        # udregne noget profit
        # finde ud af hvad der skal returneres

@jit(nopython=True)
def Simulations(sim, price, period, delta, alpha, theta):
    # Qtable1_list = []
    # Qtable2_list = []
    all_price1 = np.zeros((sim, period))
    all_price2 = np.zeros((sim, period))
    all_profit1 = np.zeros((sim, period))
    all_profit2 = np.zeros((sim, period)) 
    final_profit1_list = []
    final_profit2_list = []

    for i in range(sim):
        _, _, all_price1[i, :], all_price2[i, :], all_profit1[i, :], all_profit2[i, :], final_profit_1, final_profit_2 = Qfunction(price, period, delta, alpha, theta)
        # Qtable1_list.append(Qtable1)
        # Qtable2_list.append(Qtable2)
        final_profit1_list.append(final_profit_1)
        final_profit2_list.append(final_profit_2)

    return all_price1, all_price2, all_profit1, all_profit2, final_profit1_list, final_profit2_list

# Time calculation for 1000 simulations

start_time  = time.time()

np.random.seed(387)
all_p1, all_p2, all_profit1, all_profit2, final_avg_profit1, final_avg_profit2 = Simulations(1000, P, 500000, 0.95, 0.3, 0.0000276306)

end_time = time.time()

elapsed_time = end_time - start_time

print("Time taken to run 1000 simulations:", elapsed_time, "seconds")