In [55]:
import numpy as np
import numpy as np
import random

In [56]:
# Payoffs
payoff_matrix = {
    ('C', 'C'): (3, 3),
    ('C', 'D'): (0, 5),
    ('D', 'C'): (5, 0),
    ('D', 'D'): (1, 1)
}

def random_strategy(history, player):
    """Always picks C or D at random."""
    return random.choice(['C','D'])

def tit_for_tat(history, player):
    """
    Cooperate on first move; thereafter mirror opponent's last move.
    history is list of tuples [(p1_move, p2_move), ...].
    player is 1 or 2.
    """
    if not history:
        return 'C'
    last = history[-1]
    last_move = ""
    if player == 1:
        last_move = last[1]
    elif player == 2:
        last_move = last[0]
    return "D" if last_move == "D" else "C"

# --- Always Cooperate ---
def always_cooperate(history, player):
    return 'C'

# --- Always Defect ---
def always_defect(history, player):
    return 'D'

# --- Grim Trigger ---
def grim_trigger(history, player):
    if not history:
        return 'C'
    opp_defected = any(
        (h[1]=='D' if player==1 else h[0]=='D')
        for h in history
    )
    return 'D' if opp_defected else 'C'


def tit_for_two_tats(history, player):
    if len(history) < 2:
        return 'C'
    last_two = history[-2:]
    opp_moves = [(h[1] if player==1 else h[0]) for h in last_two]
    return 'D' if all(m=='D' for m in opp_moves) else 'C'


def pavlov(history, player):
    """
    If last round got high payoff (3 or 5), repeat your last action; otherwise switch.
    """
    if not history:
        return 'C'
    last = history[-1]
    payoff_p1, payoff_p2 = payoff_matrix[(last[0], last[1])]
    payoff = payoff_p1 if player==1 else payoff_p2
    last_action = last[0] if player==1 else last[1]
    if payoff >= 3:
        return last_action
    return 'C' if last_action=='D' else 'D'


def generous_tit_for_tat(history, player, generosity=0.3):
    """
    Like tit-for-tat, but with a chance to forgive (cooperate) even if opponent defected.
    generosity = probability of forgiving.
    """
    if not history:
        return 'C'
    opp_last = history[-1][1] if player==1 else history[-1][0]
    if opp_last == 'C':
        return 'C'
    return 'C' if random.random() < generosity else 'D'


def soft_majority(history, player):
    """
    Cooperate if opponent has cooperated more often than defected so far; else defect.
    """
    if not history:
        return 'C'
    opp_moves = [h[1] if player==1 else h[0] for h in history]
    coop = opp_moves.count('C')
    defect = opp_moves.count('D')
    return 'C' if coop >= defect else 'D'


def simulate_game(rounds, strat1, strat2):
    history = []
    for r in range(1, rounds+1):
        a1 = strat1(history, 1)
        a2 = strat2(history, 2)
        p1, p2 = payoff_matrix[(a1,a2)]
        history.append([r,a1, a2,p1,p2])

    total1 = sum(h[3] for h in history)
    total2 = sum(h[4] for h in history)
    
    if total1 > total2:
        winner = 1
    elif total2 > total1:
        winner = 2
    else:
        winner = 0

    return history, {'p1_total': total1, 'p2_total': total2}, winner

In [57]:
STRATEGIES = {
    'Tit-for-Tat': tit_for_tat,
    'Tit-for-2-Tat': tit_for_two_tats,
    'Grim Trigger': grim_trigger,
    'Pavlov': pavlov,
    'Always Defect': always_defect,
    'Always Cooperate': always_cooperate,
    'Generous TFT': generous_tit_for_tat,
    'Soft Majority': soft_majority,
    'Random Strategy': random_strategy
}

def generate_strategy_dataset(strategies, seq_len=100, games_per_strat=500):
    pair2id = {('C','C'):0, ('C','D'):1, ('D','C'):2, ('D','D'):3}
    X, y = [], []
    rowid = 0
    for label, strat in enumerate(strategies.values()):
        for _ in range(games_per_strat):
            history = []
            # simulate seq_len rounds
            for _ in range(seq_len):
                a1 = strat(history, 1)
                a2 = random_strategy(history, 2)
                p1, p2 = payoff_matrix[(a1,a2)]
                history.append((a1,a2))
                rowid += 1
            # encode the last seq_len pairs
            tokens = [pair2id[p] for p in history]
            X.append(tokens)
            y.append(list(STRATEGIES.keys())[label])
    return np.array(X), np.array(y)

X, y = generate_strategy_dataset(STRATEGIES, seq_len=150, games_per_strat=3000)
print("X shape:", X.shape, "y shape:", y.shape)

X shape: (27000, 150) y shape: (27000,)


In [58]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

# Train
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred,
      target_names=list(STRATEGIES.keys())))

                  precision    recall  f1-score   support

     Tit-for-Tat       0.93      1.00      0.97       600
   Tit-for-2-Tat       1.00      1.00      1.00       600
    Grim Trigger       0.84      0.70      0.76       600
          Pavlov       0.94      1.00      0.97       600
   Always Defect       0.89      0.99      0.94       600
Always Cooperate       0.99      0.83      0.91       600
    Generous TFT       1.00      0.83      0.91       600
   Soft Majority       0.81      0.99      0.89       600
 Random Strategy       0.87      0.89      0.88       600

        accuracy                           0.92      5400
       macro avg       0.92      0.92      0.91      5400
    weighted avg       0.92      0.92      0.91      5400



In [59]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Dense, Dropout
)
from tensorflow.keras import Model
from sklearn.preprocessing import LabelEncoder

num_strats = len(STRATEGIES)
seq_len = X.shape[1]
vocab_size = 4  # four possible (p1,p2) pairs

# build model
inp = Input(shape=(seq_len,), dtype="int32")
x = Embedding(vocab_size, 16, input_length=seq_len)(inp)
x = LSTM(32, return_sequences=False)(x)
x = Dropout(0.2)(x)
out = Dense(num_strats, activation="softmax")(x)
model = Model(inp, out)

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

le = LabelEncoder()
le.fit(y_train)

y_train_cat = le.transform(y_train)

# train
history = model.fit(
    X_train, y_train_cat,
    validation_split=0.1,
    epochs=15, batch_size=64
)


y_test_cat = le.transform(y_test)
# evaluate
loss, acc = model.evaluate(X_test, y_test_cat, verbose=0)
y_pred_cat = model.predict(X_test).argmax(axis=-1)
print(f"LSTM test accuracy: {acc:.3f}")

Epoch 1/15




[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.3009 - loss: 1.7229 - val_accuracy: 0.4838 - val_loss: 1.0412
Epoch 2/15
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.4757 - loss: 1.0186 - val_accuracy: 0.5454 - val_loss: 0.8770
Epoch 3/15
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5551 - loss: 0.8768 - val_accuracy: 0.5787 - val_loss: 0.8361
Epoch 4/15
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6043 - loss: 0.7966 - val_accuracy: 0.6852 - val_loss: 0.6425
Epoch 5/15
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6685 - loss: 0.6415 - val_accuracy: 0.6745 - val_loss: 0.6202
Epoch 6/15
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6759 - loss: 0.6166 - val_accuracy: 0.6991 - val_loss: 0.6231
Epoch 7/15
[1m304/304[0m [32m━

In [61]:
y_test_stat = le.inverse_transform(y_test_cat)
y_test_pred_stat = le.inverse_transform(y_pred_cat)
print(classification_report(y_test_pred_stat, y_test_stat, target_names=list(STRATEGIES.keys())))

                  precision    recall  f1-score   support

     Tit-for-Tat       0.99      0.85      0.92       704
   Tit-for-2-Tat       1.00      1.00      1.00       600
    Grim Trigger       1.00      1.00      1.00       601
          Pavlov       0.99      0.96      0.97       618
   Always Defect       1.00      0.99      1.00       603
Always Cooperate       0.99      0.99      0.99       604
    Generous TFT       0.76      0.98      0.86       467
   Soft Majority       1.00      1.00      1.00       603
 Random Strategy       1.00      1.00      1.00       600

        accuracy                           0.97      5400
       macro avg       0.97      0.97      0.97      5400
    weighted avg       0.98      0.97      0.97      5400

