In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import seaborn as sns

In [8]:
class Firm_N:
    def __init__(self, mc, price_floor, price_cap, learning_rate=0.85, discount_factor=0.98):
        self.mc = mc
        self.price_floor = price_floor
        self.price_cap = price_cap
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.q_table = {}

    def get_state(self, prices):
        return tuple(int(p) for p in prices)  # Keep states as whole numbers

    def get_action(self, state, prices, firm_index):
        if state not in self.q_table:
            self.q_table[state] = {0: 0, 1: 0, 2: 0}  # Actions: 0 (decrease), 1 (same), 2 (increase)

        # Find the minimum price among competitors (exclude own price)
        competitor_prices = prices[:firm_index] + prices[firm_index+1:]  # Excludes current firm's price
        min_competitor_price = min(competitor_prices)

        # Check if the firm's price is at least 2 below the minimum competitor price
        if prices[firm_index] <= min_competitor_price - 2:
            # Apply exploration rate, but limit to actions 1 (same) or 2 (increase)
            if np.random.rand() < 0.5:  # Exploration rate
                return np.random.choice([1, 2])
            else:
                return max({k: v for k, v in self.q_table[state].items() if k in [1, 2]}, key=self.q_table[state].get)

        # Regular exploration-exploitation logic
        if np.random.rand() < 0.5:  # Exploration rate
            return np.random.choice([0, 1, 2])
        else:
            return max(self.q_table[state], key=self.q_table[state].get)

    def get_learned_policy(self):
        policy = {}
        for state, actions in self.q_table.items():
            best_action = max(actions, key=actions.get)
            policy[state] = best_action
        return policy

    def update_q_value(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {0: 0, 1: 0, 2: 0}
        best_next_action = max(self.q_table[next_state], key=self.q_table[next_state].get)

        self.q_table[state][action] += self.learning_rate * (
            reward + self.discount_factor * self.q_table[next_state][best_next_action] - self.q_table[state][action]
        )

    def calculate_profit(self, price, min_price_n):
        return (100 - price) * (price - 10) / min_price_n


def simulate_bertrand_rl(n_players=2, max_iterations=500000, price_floor=10, price_cap=55, start_prices=10, df=0.98):
    firms = [Firm_N(mc=10, price_floor=price_floor, price_cap=price_cap, discount_factor=df) for _ in range(n_players)]

    if isinstance(start_prices, (int, float)):
        prices = [start_prices] * n_players
    elif isinstance(start_prices, list):
        if len(start_prices) != n_players:
            raise ValueError("Length of start_prices must match n_players")
        prices = start_prices
    else:
        raise ValueError("start_prices must be an int, float, or list of length n_players")

    price_histories = [[p] for p in prices]
    rewards = [[] for _ in range(n_players)]
    Final_policy =[]

    for _ in range(max_iterations):
        states = [firm.get_state(prices) for firm in firms]
        actions = [firm.get_action(state, prices, j) for j, (firm, state) in enumerate(zip(firms, states))]

        # Determine new prices
        new_prices = []
        for j, (firm, action) in enumerate(zip(firms, actions)):
            if action == 0:  # Decrease price
                new_price = max(prices[j] - 1, price_floor)
            elif action == 2:  # Increase price
                new_price = min(prices[j] + 1, price_cap)
            else:
                new_price = prices[j]
            new_prices.append(new_price)

        # Find the minimum price and the number of firms setting it
        min_price = min(new_prices)
        min_price_n = new_prices.count(min_price)

        # Compute profit ONCE (only firms with min_price get this)
        profit_value = (100 - min_price) * (min_price - 10) / min_price_n

        # Assign profits (only firms setting min_price receive profit_value)
        profits = [profit_value if new_prices[j] == min_price else 0 for j in range(n_players)]

        # Update Q-values
        for j, firm in enumerate(firms):
            next_state = firm.get_state(new_prices)
            firm.update_q_value(states[j], actions[j], profits[j], next_state)

        # Store results
        for j in range(n_players):
            price_histories[j].append(new_prices[j])
            rewards[j].append(profits[j])

        prices = new_prices
        

    for firm in firms:
        Final_policy.append(firm.get_learned_policy())

    return price_histories, Final_policy

In [9]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import pandas as pd

discount_factors = [0.1, 0.5, 0.98]
price_floor = 10
price_cap = 55
max_iterations = 250000
repeats = 50

price_histories_A = {df: [] for df in discount_factors}
price_histories_B = {df: [] for df in discount_factors}
mean_prices_A = {}
mean_prices_B = {}

def moving_average(data, window_size):
    return np.convolve(data, np.ones(window_size)/window_size, mode='valid')

for discount_factor in discount_factors:
    all_prices_A = []
    all_prices_B = []
    
    for _ in range(repeats):
        price_history, _ = simulate_bertrand_rl(n_players=2, max_iterations=250000, price_floor=10, price_cap=55, start_prices=10, df=discount_factor)
        
        all_prices_A.append(price_history[0])
        all_prices_B.append(price_history[1])
    
    price_histories_A[discount_factor] = all_prices_A
    price_histories_B[discount_factor] = all_prices_B
    mean_prices_A[discount_factor] = np.mean(all_prices_A, axis=0)
    mean_prices_B[discount_factor] = np.mean(all_prices_B, axis=0)


In [10]:
t_matrix = pd.DataFrame(index=discount_factors, columns=discount_factors)

for i, df1 in enumerate(discount_factors):
    for j, df2 in enumerate(discount_factors):
        if i < j:
            last_values_df1 = [prices[-1] for prices in price_histories_A[df1]]
            last_values_df2 = [prices[-1] for prices in price_histories_A[df2]]
            
            t_stat, _ = ttest_ind(last_values_df1, last_values_df2)
            t_matrix.loc[df1, df2] = round(t_stat, 3)


print("T-statistics Matrix:\n", t_matrix)

latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\begin{tabular}{|" + "c|" * (len(discount_factors) + 1) + "}\n"
latex_code += "\\hline\n"
latex_code += " & " + " & ".join([str(df) for df in discount_factors]) + " \\\\ \\hline\n"

for df1 in discount_factors:
    latex_code += f"{df1} & "
    for df2 in discount_factors:
        if df1 == df2:
            latex_code += " - & "
        elif pd.isna(t_matrix.loc[df1, df2]):
            latex_code += " & "
        else:
            latex_code += f"{t_matrix.loc[df1, df2]} & "
    latex_code = latex_code.rstrip(' & ') + " \\\\ \\hline\n"
    
latex_code += "\\end{tabular}\n"
latex_code += "\\caption{T-Statistics for Price Differences Between Discount Factors}\n"
latex_code += "\\end{table}"

print(latex_code)


T-statistics Matrix:
      0.10   0.50    0.98
0.10  NaN -2.105 -37.227
0.50  NaN    NaN -34.901
0.98  NaN    NaN     NaN
\begin{table}[ht]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
 & 0.1 & 0.5 & 0.98 \\ \hline
0.1 &  - & -2.105 & -37.227 \\ \hline
0.5 &  &  - & -34.901 \\ \hline
0.98 &  &  &  - \\ \hline
\end{tabular}
\caption{T-Statistics for Price Differences Between Discount Factors}
\end{table}


In [10]:
t_matrix = pd.DataFrame(index=discount_factors, columns=discount_factors)

for i, df1 in enumerate(discount_factors):
    for j, df2 in enumerate(discount_factors):
        if i < j:
            last_values_df1 = [prices[-1] for prices in price_histories_A[df1]]
            last_values_df2 = [prices[-1] for prices in price_histories_A[df2]]
            
            t_stat, _ = ttest_ind(last_values_df1, last_values_df2)
            t_matrix.loc[df1, df2] = round(t_stat, 3)


print("T-statistics Matrix:\n", t_matrix)

latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\begin{tabular}{|" + "c|" * (len(discount_factors) + 1) + "}\n"
latex_code += "\\hline\n"
latex_code += " & " + " & ".join([str(df) for df in discount_factors]) + " \\\\ \\hline\n"

for df1 in discount_factors:
    latex_code += f"{df1} & "
    for df2 in discount_factors:
        if df1 == df2:
            latex_code += " - & "
        elif pd.isna(t_matrix.loc[df1, df2]):
            latex_code += " & "
        else:
            latex_code += f"{t_matrix.loc[df1, df2]} & "
    latex_code = latex_code.rstrip(' & ') + " \\\\ \\hline\n"
    
latex_code += "\\end{tabular}\n"
latex_code += "\\caption{T-Statistics for Price Differences Between Discount Factors}\n"
latex_code += "\\end{table}"

print(latex_code)


T-statistics Matrix:
      0.10   0.50    0.98
0.10  NaN -2.105 -37.227
0.50  NaN    NaN -34.901
0.98  NaN    NaN     NaN
\begin{table}[ht]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
 & 0.1 & 0.5 & 0.98 \\ \hline
0.1 &  - & -2.105 & -37.227 \\ \hline
0.5 &  &  - & -34.901 \\ \hline
0.98 &  &  &  - \\ \hline
\end{tabular}
\caption{T-Statistics for Price Differences Between Discount Factors}
\end{table}


In [13]:
import numpy as np
import scipy.stats as stats

# Calculate means for each discount factor
means_A = {df: np.mean([prices[-1] for prices in price_histories_A[df]]) for df in discount_factors}

# Calculate standard deviations and sample sizes for each discount factor
std_devs_A = {df: np.std([prices[-1] for prices in price_histories_A[df]]) for df in discount_factors}
sample_sizes_A = {df: len(price_histories_A[df]) for df in discount_factors}

# Compute the confidence intervals for the means
confidence_intervals_A = {}
for df in discount_factors:
    # Standard error (SE) = std_dev / sqrt(sample_size)
    SE = std_devs_A[df] / np.sqrt(sample_sizes_A[df])
    # Confidence interval for 95% confidence level (z-score = 1.96)
    margin_of_error = 1.96 * SE
    confidence_intervals_A[df] = (means_A[df] - margin_of_error, means_A[df] + margin_of_error)

# Generate LaTeX code for the table with means and confidence intervals
latex_mean_ci_code = "\\begin{table}[ht]\n"
latex_mean_ci_code += "\\centering\n"
latex_mean_ci_code += "\\begin{tabular}{|c|c|c|c|}\n"
latex_mean_ci_code += "\\hline\n"
latex_mean_ci_code += "Discount Factor & Mean (Firm A) & CI Lower Bound & CI Upper Bound \\\\ \\hline\n"

for df in discount_factors:
    mean = means_A[df]
    ci_lower, ci_upper = confidence_intervals_A[df]
    latex_mean_ci_code += f"{df} & {mean:.4f} & {ci_lower:.4f} & {ci_upper:.4f} \\\\ \\hline\n"

latex_mean_ci_code += "\\end{tabular}\n"
latex_mean_ci_code += "\\caption{Means and Confidence Intervals of Final Prices for Firm A Across Discount Factors}\n"
latex_mean_ci_code += "\\end{table}"

# Print the LaTeX code
print(latex_mean_ci_code)


\begin{table}[ht]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
Discount Factor & Mean (Firm A) & CI Lower Bound & CI Upper Bound \\ \hline
0.1 & 11.6000 & 11.2674 & 11.9326 \\ \hline
0.5 & 12.2200 & 11.7552 & 12.6848 \\ \hline
0.98 & 34.1800 & 33.0511 & 35.3089 \\ \hline
\end{tabular}
\caption{Means and Confidence Intervals of Final Prices for Firm A Across Discount Factors}
\end{table}


In [12]:
raw_data = {"Discount Factor": []}

for discount_factor in discount_factors:
    last_values = [prices[-1] for prices in price_histories_A[discount_factor]]
    raw_data["Discount Factor"].extend([discount_factor] * len(last_values))
    raw_data[f"Last Prices (Firm A)"] = raw_data.get(f"Last Prices (Firm A)", []) + last_values

raw_data_df = pd.DataFrame(raw_data)

csv_filename = "NEWlast_prices_for_ttest_analysis.csv"
raw_data_df.to_csv(csv_filename, index=False)

print(f"Raw data for the last prices saved to '{csv_filename}'.")


Raw data for the last prices saved to 'NEWlast_prices_for_ttest_analysis.csv'.
