In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import seaborn as sns

In [2]:
class Firm:
    def __init__(self, mc, price_floor, price_cap, learning_rate=0.85, discount_factor=0.9):
        self.mc = mc
        self.price_floor = price_floor
        self.price_cap = price_cap
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.q_table = {}

    def get_state(self, price_A, price_B):
        return (round(price_A, 2), round(price_B, 2))

    def get_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {0: 0, 1: 0, 2: 0}
        if np.random.rand() < 0.1: 
            return np.random.choice([0, 1, 2])
        else:
            return max(self.q_table[state], key=self.q_table[state].get)


    def update_q_value(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {0: 0, 1: 0, 2: 0}
        best_next_action = max(self.q_table[next_state], key=self.q_table[next_state].get)
              
        last_price_effect = 0.5 * (next_state[1] - self.mc)
        
        scaled_reward = reward + last_price_effect
        self.q_table[state][action] += self.learning_rate * (scaled_reward + self.discount_factor * self.q_table[next_state][best_next_action] - self.q_table[state][action])
    def calculate_profit(self, price):
        quantity_sold = 100 - price
        return (price - self.mc) * max(quantity_sold, 0)  
    
    def get_learned_policy(self):
        policy = {}
        for state, actions in self.q_table.items():
            best_action = max(actions, key=actions.get)
            policy[state] = best_action
        return policy

def simulate_bertrand_rl(max_iterations=250000, price_floor=10, price_cap=55, start_price_A=10, start_price_B=10):
    firm_A = Firm(mc=10, price_floor=price_floor, price_cap=price_cap)
    firm_B = Firm(mc=20, price_floor=price_floor, price_cap=price_cap)

    price_A = start_price_A if start_price_A is not None else np.random.uniform(price_floor, price_cap)
    price_B = start_price_B if start_price_B is not None else np.random.uniform(price_floor, price_cap)

    price_history_A = [start_price_A]
    price_history_B = [start_price_B]
    reward = []
    convergence = []
    tolerance = 3
    check_interval = 100000
    stable_count = 0
    min_check_iterations = 500000
    Final_Policy = []
    actions = []
    for i in range(max_iterations):
        '''
        if (i + 1) % 250000 == 0:
            print(f"Iteration {i + 1}")
        '''
        state_A = firm_A.get_state(price_A, price_B)
        state_B = firm_B.get_state(price_B, price_A)

        action_A = firm_A.get_action(state_A)
        action_B = firm_B.get_action(state_B)

        if price_history_A[i-1] < price_history_B[i-1]:
            price_A = min(price_B, 55)
            actions.append(2)
        else:
            if action_A == 0: 
                price_A = max(price_A - 1, price_floor)
            elif action_A == 2:
                price_A = min(price_A + 1, price_cap)
            actions.append(action_A)
                
                
        if price_history_A[i-1] > price_history_B[i-1]:
            price_B = min(price_history_A[i-1], 55)
            actions.append(2)
        else:
            if action_B == 0:
                price_B = max(price_B - 1, price_floor)
            elif action_B == 2:
                price_B = min(price_B + 1, price_cap)
            actions.append(action_B)

        if price_A > price_B:  
            profit_A = 0
            profit_B = firm_B.calculate_profit(price_B)
            
        if price_A < price_B:
            profit_A = firm_A.calculate_profit(price_A)
            profit_B = 0
        if price_A == price_B:
            profit_A = firm_A.calculate_profit(price_A)
            profit_B = firm_B.calculate_profit(price_B)
        
        firm_A.last_price = price_B
        firm_B.last_price = price_A

        firm_A.update_q_value(state_A, action_A, profit_A, firm_A.get_state(price_A, price_B))
        firm_B.update_q_value(state_B, action_B, profit_B, firm_B.get_state(price_B, price_A))

        price_history_A.append(price_A)
        price_history_B.append(price_B)
        reward.append(profit_A)
        reward.append(profit_B)
        
        if i == max_iterations - 1:
            Final_Policy.append(firm_B.get_learned_policy())
            Final_Policy.append(firm_A.get_learned_policy())
    return price_history_A, price_history_B, actions, Final_Policy

In [3]:
class Firm_no_effect:
    def __init__(self, mc, price_floor, price_cap, learning_rate=0.85, discount_factor=0.9):
        self.mc = mc
        self.price_floor = price_floor
        self.price_cap = price_cap
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.q_table = {}

    def get_state(self, price_A, price_B):
        return (round(price_A, 2), round(price_B, 2))

    def get_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {0: 0, 1: 0, 2: 0}
        if np.random.rand() < 0.1:
            return np.random.choice([0, 1, 2])
        else:
            return max(self.q_table[state], key=self.q_table[state].get)


    def update_q_value(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {0: 0, 1: 0, 2: 0}
        best_next_action = max(self.q_table[next_state], key=self.q_table[next_state].get)
        
        scaled_reward = reward
        self.q_table[state][action] += self.learning_rate * (scaled_reward + self.discount_factor * self.q_table[next_state][best_next_action] - self.q_table[state][action])
    def calculate_profit(self, price):
        quantity_sold = 100 - price
        return (price - self.mc) * max(quantity_sold, 0)
    
    def get_learned_policy(self):
        policy = {}
        for state, actions in self.q_table.items():
            best_action = max(actions, key=actions.get)
            policy[state] = best_action
        return policy

def simulate_bertrand_rl_no_effect(max_iterations=250000, price_floor=10, price_cap=55, start_price_A=10, start_price_B=10):
    firm_A = Firm_no_effect(mc=10, price_floor=price_floor, price_cap=price_cap)
    firm_B = Firm_no_effect(mc=20, price_floor=price_floor, price_cap=price_cap)

    # Set starting prices
    price_A = start_price_A if start_price_A is not None else np.random.uniform(price_floor, price_cap)
    price_B = start_price_B if start_price_B is not None else np.random.uniform(price_floor, price_cap)

    price_history_A = [start_price_A]
    price_history_B = [start_price_B]
    reward = []
    convergence = []
    tolerance = 3
    check_interval = 100000
    stable_count = 0
    min_check_iterations = 500000
    Final_Policy = []
    actions = []
    for i in range(max_iterations):
        '''
        if (i + 1) % 250000 == 0:
            print(f"Iteration {i + 1}")
        '''
        state_A = firm_A.get_state(price_A, price_B)
        state_B = firm_B.get_state(price_B, price_A)

        action_A = firm_A.get_action(state_A)
        action_B = firm_B.get_action(state_B)

        if price_history_A[i-1] < price_history_B[i-1]:
            price_A = min(price_B, 55)
            actions.append(2)
        else:
            if action_A == 0:
                price_A = max(price_A - 1, price_floor)
            elif action_A == 2:
                price_A = min(price_A + 1, price_cap)
            actions.append(action_A)
                
                
        if price_history_A[i-1] > price_history_B[i-1]:
            price_B = min(price_history_A[i-1], 55)
            actions.append(2)
        else:
            if action_B == 0:
                price_B = max(price_B - 1, price_floor)
            elif action_B == 2:
                price_B = min(price_B + 1, price_cap)
            actions.append(action_B)

        if price_A > price_B:  
            profit_A = 0
            profit_B = firm_B.calculate_profit(price_B)
            
        if price_A < price_B:
            profit_A = firm_A.calculate_profit(price_A)
            profit_B = 0
        if price_A == price_B:
            profit_A = firm_A.calculate_profit(price_A)
            profit_B = firm_B.calculate_profit(price_B)
        
        firm_A.last_price = price_B
        firm_B.last_price = price_A

        firm_A.update_q_value(state_A, action_A, profit_A, firm_A.get_state(price_A, price_B))
        firm_B.update_q_value(state_B, action_B, profit_B, firm_B.get_state(price_B, price_A))

        price_history_A.append(price_A)
        price_history_B.append(price_B)
        reward.append(profit_A)
        reward.append(profit_B)
        
        if i == max_iterations - 1:
            Final_Policy.append(firm_B.get_learned_policy())
            Final_Policy.append(firm_A.get_learned_policy())
    return price_history_A, price_history_B, actions, Final_Policy

In [6]:
start_price_A = 10
start_price_B = 10
with open('convergence_no_effect.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    for t in range(0, 100):
        if t < 50:
            price_history_A, price_history_B, reward, time = simulate_bertrand_rl(start_price_A=start_price_A, start_price_B=start_price_B)
            writer.writerow([price_history_B[-1]])
        else:
            price_history_A, price_history_B, reward, time = simulate_bertrand_rl_no_effect(start_price_A=start_price_A, start_price_B=start_price_B)
            writer.writerow([price_history_B[-1]])
            
        if t % 10 == 0:
            print(t+1)

1
11
21
31
41
51
61
71
81
91


In [13]:
import pandas as pd
from scipy.stats import ttest_ind

# Load the data
data = pd.read_csv('convergence_no_effect.csv')

# Split the data into two halves
first_half = data.iloc[:50].values.flatten()
second_half = data.iloc[50:].values.flatten()

# Perform a two-sample t-test
t_stat, p_value = ttest_ind(first_half, second_half)

# Calculate additional statistics
mean_first = first_half.mean()
std_first = first_half.std()
n_first = len(first_half)

mean_second = second_half.mean()
std_second = second_half.std()
n_second = len(second_half)

# Print the results
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

if p_value < 0.05:
    print("The difference between the two halves is statistically significant.")
else:
    print("No statistically significant difference between the two halves.")

# Create a LaTeX table
latex_table = f"""
\\begin{{table}}[ht]
\\centering
\\begin{{tabular}}{{lcc}}
\\hline
 & With Price Effect & Without Price Effect \\\\
\\hline
Number of Observations & {n_first} & {n_second} \\\\
Mean & {mean_first:.4f} & {mean_second:.4f} \\\\
Standard Deviation & {std_first:.4f} & {std_second:.4f} \\\\
\\hline
\\multicolumn{{3}}{{l}}{{T-statistic: {t_stat:.4f}}} \\\\
\\multicolumn{{3}}{{l}}{{P-value: {p_value:.4f}}} \\\\
\\hline
\\end{{tabular}}
\\caption{{T-test Analysis of With Price Effect vs. Without Price Effect}}
\\label{{tab:t_test_price_effect}}
\\end{{table}}
"""

print(latex_table)


T-statistic: 0.3037
P-value: 0.7620
No statistically significant difference between the two halves.

\begin{table}[ht]
\centering
\begin{tabular}{lcc}
\hline
 & With Price Effect & Without Price Effect \\
\hline
Number of Observations & 50 & 49 \\
Mean & 51.8000 & 51.5714 \\
Standard Deviation & 3.4756 & 3.9279 \\
\hline
\multicolumn{3}{l}{T-statistic: 0.3037} \\
\multicolumn{3}{l}{P-value: 0.7620} \\
\hline
\end{tabular}
\caption{T-test Analysis of With Price Effect vs. Without Price Effect}
\label{tab:t_test_price_effect}
\end{table}

