In [98]:
import numpy as np
import numpy as np
import random

In [99]:
# Payoffs
payoff_matrix = {
    ('C', 'C'): (3, 3),
    ('C', 'D'): (0, 5),
    ('D', 'C'): (5, 0),
    ('D', 'D'): (1, 1)
}

def random_strategy(history, player):
    """Always picks C or D at random."""
    return random.choice(['C','D'])

def tit_for_tat(history, player):
    """
    Cooperate on first move; thereafter mirror opponent's last move.
    history is list of tuples [(p1_move, p2_move), ...].
    player is 1 or 2.
    """
    if not history:
        return 'C'
    last = history[-1]
    last_move = ""
    if player == 1:
        last_move = last[1]
    elif player == 2:
        last_move = last[0]
    return "D" if last_move == "D" else "C"

# --- Always Cooperate ---
def always_cooperate(history, player):
    return 'C'

# --- Always Defect ---
def always_defect(history, player):
    return 'D'

# --- Grim Trigger ---
def grim_trigger(history, player):
    if not history:
        return 'C'
    opp_defected = any(
        (h[1]=='D' if player==1 else h[0]=='D')
        for h in history
    )
    return 'D' if opp_defected else 'C'


def tit_for_two_tats(history, player):
    if len(history) < 2:
        return 'C'
    last_two = history[-2:]
    opp_moves = [(h[1] if player==1 else h[0]) for h in last_two]
    return 'D' if all(m=='D' for m in opp_moves) else 'C'


def pavlov(history, player):
    """
    If last round got high payoff (3 or 5), repeat your last action; otherwise switch.
    """
    if not history:
        return 'C'
    last = history[-1]
    payoff_p1, payoff_p2 = payoff_matrix[(last[0], last[1])]
    payoff = payoff_p1 if player==1 else payoff_p2
    last_action = last[0] if player==1 else last[1]
    if payoff >= 3:
        return last_action
    return 'C' if last_action=='D' else 'D'


def generous_tit_for_tat(history, player, generosity=0.3):
    """
    Like tit-for-tat, but with a chance to forgive (cooperate) even if opponent defected.
    generosity = probability of forgiving.
    """
    if not history:
        return 'C'
    opp_last = history[-1][1] if player==1 else history[-1][0]
    if opp_last == 'C':
        return 'C'
    return 'C' if random.random() < generosity else 'D'


def soft_majority(history, player):
    """
    Cooperate if opponent has cooperated more often than defected so far; else defect.
    """
    if not history:
        return 'C'
    opp_moves = [h[1] if player==1 else h[0] for h in history]
    coop = opp_moves.count('C')
    defect = opp_moves.count('D')
    return 'C' if coop >= defect else 'D'

# --- 1. Alternator ---
def alternator(history, player):
    """
    Cooperate on odd‐numbered rounds, defect on even‐numbered rounds.
    """
    round_num = len(history) + 1
    return 'C' if round_num % 2 == 1 else 'D'


# --- 2. Suspicious Tit‑for‑Tat ---
def suspicious_tit_for_tat(history, player):
    """
    Defect on the first move, then play Tit‑for‑Tat.
    """
    if not history:
        return 'D'
    
    last = history[-1][1] if player == 1 else history[-1][0]
    return last


# --- 3. Tester ---
def tester(history, player):
    """
    Round1: Defect  
    Round2: Cooperate  
    Thereafter: Tit‑for‑Tat
    """
    if len(history) == 0:
        return 'D'
    if len(history) == 1:
        return 'C'
    last = history[-1][1] if player == 1 else history[-1][0]
    return last


# --- 4. Limited Retaliation ---
def limited_retaliation(history, player, m=2):
    """
    Cooperate until opponent defects; then defect for m rounds, then forgive.
    """
    if not history:
        return 'C'
    # extract opponent’s moves
    opp = [h[1] if player == 1 else h[0] for h in history]
    # find index of last defection
    defections = [i for i, move in enumerate(opp) if move == 'D']
    if not defections:
        return 'C'
    last_def = defections[-1]
    # soon after that defection, we punish for m rounds
    current = len(history)
    if current <= last_def + m:
        return 'D'
    return 'C'


# --- 5. Gradual (simplified) ---
def gradual(history, player):
    """
    For each opponent defection, punish with two defections, then two cooperations.
    This simplified version uses total # of opponent defections to enter a 4‐round cycle.
    """
    # count opponent defections
    opp = [h[1] if player == 1 else h[0] for h in history]
    d_count = opp.count('D')
    # cycle through: D, D, C, C
    phase = d_count % 4
    return 'D' if phase in (0, 1) else 'C'


def simulate_game(rounds, strat1, strat2):
    history = []
    for r in range(1, rounds+1):
        a1 = strat1(history, 1)
        a2 = strat2(history, 2)
        p1, p2 = payoff_matrix[(a1,a2)]
        history.append([r,a1, a2,p1,p2])

    total1 = sum(h[3] for h in history)
    total2 = sum(h[4] for h in history)
    
    if total1 > total2:
        winner = 1
    elif total2 > total1:
        winner = 2
    else:
        winner = 0

    return history, {'p1_total': total1, 'p2_total': total2}, winner

In [100]:
STRATEGIES = {
    'Tit-for-Tat': tit_for_tat,
    'Tit-for-2-Tat': tit_for_two_tats,
    "Suspicious-Tit-for-Tat": suspicious_tit_for_tat,
    'Grim Trigger': grim_trigger,
    'Pavlov': pavlov,
    'Always Defect': always_defect,
    'Always Cooperate': always_cooperate,
    'Generous TFT': generous_tit_for_tat,
    'Soft Majority': soft_majority,
    'Random Strategy': random_strategy,
    'Alternator': alternator,
    'Gradual': gradual,
    "Limited Retaliation": limited_retaliation,
    "Tester": tester,
}

def generate_strategy_dataset(strategies, seq_len=100, games_per_strat=500):
    pair2id = {('C','C'):0, ('C','D'):1, ('D','C'):2, ('D','D'):3}
    X, y = [], []
    rowid = 0
    for label, strat in enumerate(strategies.values()):
        for _ in range(games_per_strat):
            history = []
            # simulate seq_len rounds
            for _ in range(seq_len):
                a1 = strat(history, 1)
                a2 = random_strategy(history, 2)
                p1, p2 = payoff_matrix[(a1,a2)]
                history.append((a1,a2))
                rowid += 1
            # encode the last seq_len pairs
            tokens = [pair2id[p] for p in history]
            X.append(tokens)
            y.append(list(STRATEGIES.keys())[label])
    return np.array(X), np.array(y)

X, y = generate_strategy_dataset(STRATEGIES, seq_len=100, games_per_strat=5000)
print("X shape:", X.shape, "y shape:", y.shape)

X shape: (70000, 100) y shape: (70000,)


In [101]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

# Train
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred,
      target_names=list(STRATEGIES.keys())))

                        precision    recall  f1-score   support

           Tit-for-Tat       1.00      1.00      1.00      1000
         Tit-for-2-Tat       0.93      1.00      0.96      1000
Suspicious-Tit-for-Tat       1.00      1.00      1.00      1000
          Grim Trigger       0.80      0.74      0.77      1000
                Pavlov       0.94      1.00      0.97      1000
         Always Defect       0.96      1.00      0.98      1000
      Always Cooperate       0.96      1.00      0.98      1000
          Generous TFT       0.90      1.00      0.95      1000
         Soft Majority       1.00      0.74      0.85      1000
       Random Strategy       1.00      0.83      0.91      1000
            Alternator       0.90      0.55      0.68      1000
               Gradual       0.66      0.94      0.78      1000
   Limited Retaliation       0.84      1.00      0.91      1000
                Tester       0.88      0.83      0.86      1000

              accuracy                

In [106]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Dense, Dropout
)
from tensorflow.keras import Model
from sklearn.preprocessing import LabelEncoder

num_strats = len(STRATEGIES)
seq_len = X.shape[1]
vocab_size = 4  # four possible (p1,p2) pairs

# build model
inp = Input(shape=(seq_len,), dtype="int32")
x = Embedding(vocab_size, 16, input_length=seq_len)(inp)
x = LSTM(128, return_sequences=False)(x)
x = Dropout(0.2)(x)
out = Dense(num_strats, activation="softmax")(x)
model = Model(inp, out)

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

le = LabelEncoder()
le.fit(y_train)

y_train_cat = le.transform(y_train)

# train
history = model.fit(
    X_train, y_train_cat,
    validation_split=0.1,
    epochs=10, batch_size=128
)


y_test_cat = le.transform(y_test)
# evaluate
loss, acc = model.evaluate(X_test, y_test_cat, verbose=0)
y_pred_cat = model.predict(X_test).argmax(axis=-1)
print(f"LSTM test accuracy: {acc:.3f}")

Epoch 1/10




[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 83ms/step - accuracy: 0.2860 - loss: 1.8988 - val_accuracy: 0.5252 - val_loss: 1.0189
Epoch 2/10
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 79ms/step - accuracy: 0.5488 - loss: 0.9646 - val_accuracy: 0.6630 - val_loss: 0.6787
Epoch 3/10
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 78ms/step - accuracy: 0.6917 - loss: 0.6324 - val_accuracy: 0.7459 - val_loss: 0.4867
Epoch 4/10
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 79ms/step - accuracy: 0.7939 - loss: 0.4193 - val_accuracy: 0.8416 - val_loss: 0.3161
Epoch 5/10
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 78ms/step - accuracy: 0.8589 - loss: 0.3124 - val_accuracy: 0.9027 - val_loss: 0.2188
Epoch 6/10
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 79ms/step - accuracy: 0.8899 - loss: 0.2464 - val_accuracy: 0.9125 - val_loss: 0.1717
Epoch 7/10
[1m394/394[0m 

In [107]:
y_test_stat = le.inverse_transform(y_test_cat)
y_test_pred_stat = le.inverse_transform(y_pred_cat)
print(classification_report(y_test_pred_stat, y_test_stat, target_names=list(STRATEGIES.keys())))

                        precision    recall  f1-score   support

           Tit-for-Tat       1.00      1.00      1.00      1000
         Tit-for-2-Tat       1.00      0.94      0.97      1061
Suspicious-Tit-for-Tat       1.00      1.00      1.00      1000
          Grim Trigger       1.00      1.00      1.00      1002
                Pavlov       1.00      1.00      1.00      1000
         Always Defect       0.97      0.98      0.97       982
      Always Cooperate       1.00      1.00      1.00      1001
          Generous TFT       1.00      1.00      1.00      1000
         Soft Majority       1.00      1.00      1.00       997
       Random Strategy       0.92      0.96      0.94       959
            Alternator       0.98      0.68      0.80      1450
               Gradual       0.53      0.96      0.68       550
   Limited Retaliation       1.00      1.00      1.00       999
                Tester       1.00      1.00      1.00       999

              accuracy                