In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import seaborn as sns

In [7]:
class Firm:
    def __init__(self, mc, price_floor, price_cap, learning_rate=0.85, discount_factor=0.9):
        self.mc = mc
        self.price_floor = price_floor
        self.price_cap = price_cap
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.q_table = {}

    def get_state(self, price_A, price_B):
        return (round(price_A, 2), round(price_B, 2))

    def get_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {0: 0, 1: 0, 2: 0}
        if np.random.rand() < 0.1: 
            return np.random.choice([0, 1, 2])
        else:
            return max(self.q_table[state], key=self.q_table[state].get)


    def update_q_value(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {0: 0, 1: 0, 2: 0}
        best_next_action = max(self.q_table[next_state], key=self.q_table[next_state].get)

        last_price_effect = 0.5 * (next_state[1] - self.mc)
        
        scaled_reward = reward + last_price_effect
        self.q_table[state][action] += self.learning_rate * (scaled_reward + self.discount_factor * self.q_table[next_state][best_next_action] - self.q_table[state][action])
    def calculate_profit(self, price):
        quantity_sold = 100 - price
        return (price - self.mc) * max(quantity_sold, 0)
    
    def get_learned_policy(self):
        policy = {}
        for state, actions in self.q_table.items():
            best_action = max(actions, key=actions.get)
            policy[state] = best_action
        return policy

def simulate_bertrand_rl(max_iterations=250000, price_floor=10, price_cap=55, start_price_A=10, start_price_B=10):
    firm_A = Firm(mc=10, price_floor=price_floor, price_cap=price_cap)
    firm_B = Firm(mc=20, price_floor=price_floor, price_cap=price_cap)

    price_A = start_price_A if start_price_A is not None else np.random.uniform(price_floor, price_cap)
    price_B = start_price_B if start_price_B is not None else np.random.uniform(price_floor, price_cap)

    price_history_A = [start_price_A]
    price_history_B = [start_price_B]
    reward = []
    convergence = []
    Final_Policy = []
    actions = []
    for i in range(max_iterations):
        '''
        if (i + 1) % 250000 == 0:
            print(f"Iteration {i + 1}")
        '''
        state_A = firm_A.get_state(price_A, price_B)
        state_B = firm_B.get_state(price_B, price_A)

        action_A = firm_A.get_action(state_A)
        action_B = firm_B.get_action(state_B)

        if price_history_A[i-1] < price_history_B[i-1]:
            price_A = min(price_B, 55)
            actions.append(2)
        else:
            if action_A == 0:
                price_A = max(price_A - 1, price_floor)
            elif action_A == 2:
                price_A = min(price_A + 1, price_cap)
            actions.append(action_A)
                
                
        if price_history_A[i-1] > price_history_B[i-1]:
            price_B = min(price_history_A[i-1], 55)
            actions.append(2)
        else:
            if action_B == 0:
                price_B = max(price_B - 1, price_floor)
            elif action_B == 2:
                price_B = min(price_B + 1, price_cap)
            actions.append(action_B)

        if price_A > price_B:  
            profit_A = 0
            profit_B = firm_B.calculate_profit(price_B)
            
        if price_A < price_B:
            profit_A = firm_A.calculate_profit(price_A)
            profit_B = 0
        if price_A == price_B:
            profit_A = firm_A.calculate_profit(price_A)
            profit_B = firm_B.calculate_profit(price_B)

        firm_A.last_price = price_B
        firm_B.last_price = price_A

        firm_A.update_q_value(state_A, action_A, profit_A, firm_A.get_state(price_A, price_B))
        firm_B.update_q_value(state_B, action_B, profit_B, firm_B.get_state(price_B, price_A))

        price_history_A.append(price_A)
        price_history_B.append(price_B)
        reward.append(profit_A)
        reward.append(profit_B)
        
        if i == max_iterations - 1:
            Final_Policy.append(firm_B.get_learned_policy())
            Final_Policy.append(firm_A.get_learned_policy())
    return price_history_A, price_history_B, actions, Final_Policy

In [8]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import pandas as pd

discount_factors = [0.1, 0.5, 0.9]
price_floor = 10
price_cap = 55
max_iterations = 150000
repeats = 100

price_histories_A = {df: [] for df in discount_factors}
price_histories_B = {df: [] for df in discount_factors}
mean_prices_A = {}
mean_prices_B = {}

def moving_average(data, window_size):
    return np.convolve(data, np.ones(window_size)/window_size, mode='valid')

for discount_factor in discount_factors:
    all_prices_A = []
    all_prices_B = []
    
    for _ in range(repeats):
        firm_A_discounted = Firm(mc=10, price_floor=price_floor, price_cap=price_cap, discount_factor=discount_factor)
        firm_B_discounted = Firm(mc=10, price_floor=price_floor, price_cap=price_cap, discount_factor=discount_factor)
        
        price_history_A, price_history_B, _, _ = simulate_bertrand_rl(
            max_iterations=max_iterations,
            price_floor=price_floor,
            price_cap=price_cap,
            start_price_A=10,
            start_price_B=10
        )
        
        all_prices_A.append(price_history_A)
        all_prices_B.append(price_history_B)
    
    price_histories_A[discount_factor] = all_prices_A
    price_histories_B[discount_factor] = all_prices_B
    mean_prices_A[discount_factor] = np.mean(all_prices_A, axis=0)
    mean_prices_B[discount_factor] = np.mean(all_prices_B, axis=0)


In [21]:
t_matrix = pd.DataFrame(index=discount_factors, columns=discount_factors)

for i, df1 in enumerate(discount_factors):
    for j, df2 in enumerate(discount_factors):
        if i < j:
            last_values_df1 = [prices[-1] for prices in price_histories_A[df1]]
            last_values_df2 = [prices[-1] for prices in price_histories_A[df2]]
            
            t_stat, _ = ttest_ind(last_values_df1, last_values_df2)
            t_matrix.loc[df1, df2] = round(t_stat, 3)


print("T-statistics Matrix:\n", t_matrix)

latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\begin{tabular}{|" + "c|" * (len(discount_factors) + 1) + "}\n"
latex_code += "\\hline\n"
latex_code += " & " + " & ".join([str(df) for df in discount_factors]) + " \\\\ \\hline\n"

for df1 in discount_factors:
    latex_code += f"{df1} & "
    for df2 in discount_factors:
        if df1 == df2:
            latex_code += " - & "
        elif pd.isna(t_matrix.loc[df1, df2]):
            latex_code += " & "
        else:
            latex_code += f"{t_matrix.loc[df1, df2]} & "
    latex_code = latex_code.rstrip(' & ') + " \\\\ \\hline\n"
    
latex_code += "\\end{tabular}\n"
latex_code += "\\caption{T-Statistics for Price Differences Between Discount Factors}\n"
latex_code += "\\end{table}"

print(latex_code)


T-statistics Matrix:
      0.1    0.5    0.9
0.1  NaN -0.113  0.944
0.5  NaN    NaN  0.974
0.9  NaN    NaN    NaN
\begin{table}[ht]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
 & 0.1 & 0.5 & 0.9 \\ \hline
0.1 &  - & -0.113 & 0.944 \\ \hline
0.5 &  &  - & 0.974 \\ \hline
0.9 &  &  &  - \\ \hline
\end{tabular}
\caption{T-Statistics for Price Differences Between Discount Factors}
\end{table}


In [10]:
means_A = {df: np.mean([prices[-1] for prices in price_histories_A[df]]) for df in discount_factors}
variances_A = {df: np.var([prices[-1] for prices in price_histories_A[df]]) for df in discount_factors}

latex_mean_variance_code = "\\begin{table}[ht]\n"
latex_mean_variance_code += "\\centering\n"
latex_mean_variance_code += "\\begin{tabular}{|c|c|c|}\n"
latex_mean_variance_code += "\\hline\n"
latex_mean_variance_code += "Discount Factor & Mean (Firm A) & Variance (Firm A) \\\\ \\hline\n"

for df in discount_factors:
    mean = means_A[df]
    variance = variances_A[df]
    latex_mean_variance_code += f"{df} & {mean:.4f} & {variance:.4f} \\\\ \\hline\n"

latex_mean_variance_code += "\\end{tabular}\n"
latex_mean_variance_code += "\\caption{Means and Variances of Final Prices for Firm A Across Discount Factors}\n"
latex_mean_variance_code += "\\end{table}"

print(latex_mean_variance_code)


\begin{table}[ht]
\centering
\begin{tabular}{|c|c|c|}
\hline
Discount Factor & Mean (Firm A) & Variance (Firm A) \\ \hline
0.1 & 50.8900 & 11.5779 \\ \hline
0.5 & 50.9500 & 16.3875 \\ \hline
0.9 & 50.4100 & 14.0219 \\ \hline
\end{tabular}
\caption{Means and Variances of Final Prices for Firm A Across Discount Factors}
\end{table}


In [19]:
raw_data = {"Discount Factor": []}

for discount_factor in discount_factors:
    last_values = [prices[-1] for prices in price_histories_A[discount_factor]]
    raw_data["Discount Factor"].extend([discount_factor] * len(last_values))
    raw_data[f"Last Prices (Firm A)"] = raw_data.get(f"Last Prices (Firm A)", []) + last_values

raw_data_df = pd.DataFrame(raw_data)

csv_filename = "last_prices_for_ttest_analysis.csv"
raw_data_df.to_csv(csv_filename, index=False)

print(f"Raw data for the last prices saved to '{csv_filename}'.")


Raw data for the last prices saved to 'last_prices_for_ttest_analysis.csv'.
