In [227]:
import datetime
import itertools
import wbdata
import numpy as np
import pandas as pd
import joblib

policy = joblib.load("policy.pkl")
pair2id = {('C','C'):0, ('C','D'):1, ('D','C'):2, ('D','D'):3}
PAYOFFS = {
    ('C','C'):(3,3), ('C','D'):(0,5),
    ('D','C'):(5,0), ('D','D'):(1,1)
}

In [228]:
# --- 1. Fetch the raw tariff data again ---

countries = ["USA","CHN","DEU","BRA","IND", "JPN", "SWE", "FRA", "ITA"]
indicator = {"TM.TAX.MRCH.WM.AR.ZS": "tariff"}
date_range = (datetime.datetime(1800,1,1), datetime.datetime(2020,12,31))

raw = wbdata.get_dataframe(
    indicator,
    country=countries,
    date=date_range,
    parse_dates=True
).reset_index()

# ensure we have the columns we expect
print("Raw columns:", raw.columns.tolist())

Raw columns: ['country', 'date', 'tariff']


In [229]:
# --- 2. Discretize tariff → action (‘C’/’D’) ---

# 2a) drop any NaNs in tariff
raw = raw.dropna(subset=['tariff'])
# 2b) compute each country’s median tariff
medians = raw.groupby('country')['tariff'].median().to_dict()

# 2c) map to C/D
def to_action(row):
    return 'C' if row['tariff'] <= medians[row['country']] else 'D'

raw['action'] = raw.apply(to_action, axis=1)

print("After labeling, columns:", raw.columns.tolist())
print(raw[['country','date','tariff','action']].head())

After labeling, columns: ['country', 'date', 'tariff', 'action']
  country       date  tariff action
0  Brazil 2020-01-01    8.41      C
1  Brazil 2019-01-01    7.97      C
2  Brazil 2018-01-01    7.95      C
3  Brazil 2017-01-01    8.59      C
4  Brazil 2016-01-01    8.01      C


In [230]:
# --- 3. Pivot to get year×country action table ---
raw['year'] = raw['date'].dt.year

action_df = raw.pivot_table(
    index='year',
    columns='country',
    values='action',
    aggfunc='first'
)

print("action_df shape:", action_df.shape)
print(action_df.head())

action_df shape: (33, 9)
country Brazil China France Germany India Italy Japan Sweden United States
year                                                                      
1988       NaN   NaN    NaN     NaN   NaN   NaN     D      C           NaN
1989         D   NaN    NaN     NaN   NaN   NaN     D      D             D
1990         D   NaN    NaN     NaN     D   NaN     C    NaN             D
1991         D   NaN    NaN     NaN   NaN   NaN     D    NaN             D
1992         D     D    NaN     NaN     D   NaN     D    NaN             D


In [231]:
# --- 4. Load your policy and simulate ---

def build_features(history, seq_len):
    """
    history: list of (model_move, opp_move) tuples
    returns: 1×(2*seq_len) feature vector, padded with [0,0]
    """
    feats = []
    last = history[-seq_len:]
    pad = seq_len - len(last)
    
    # pad with zeros for missing rounds
    feats.extend([0,0] * pad)
    
    # then actual rounds
    for m, o in last:
        feats.append(int(m == 'D'))
        feats.append(int(o == 'D'))
        
    return np.array(feats).reshape(1, -1)

seq_len = 30

results = []
years     = sorted(action_df.index)
countries = list(action_df.columns)

for country in countries:
    history = []
    score_model = 0
    score_country = 0

    for year in years:
        opp = action_df.loc[year, country]
        if pd.isna(opp): continue

        X_feat = build_features(history, seq_len)  # shape (1, n_feats)
        pC, pD = policy.predict_proba(X_feat)[0]       # [P(C), P(D)]
        model_action = 'C' if pD > pC else 'D'

        pm, po = PAYOFFS[(model_action, opp)]
        score_model  += pm
        score_country+= po
        history.append((model_action, opp))


    results.append({
        'country': country,
        'model_score': score_model,
        'country_score': score_country,
        'score_diff': score_model - score_country
    })

res_df = pd.DataFrame(results).sort_values('model_score', ascending=False)
print(res_df.to_string(index=False))

      country  model_score  country_score  score_diff
        Japan           89             34          55
United States           88             28          60
       Brazil           86             31          55
        China           78             13          65
        India           77             12          65
       Sweden           71             11          60
       France           65             10          55
      Germany           65             10          55
        Italy           65             10          55


In [232]:
import datetime
import wbdata        # pip install wbdata
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# --- 1. Fetch raw tariff data (2000–2020) for a few countries ---
countries = ["USA","CHN","DEU","BRA","IND"]
indicator = {"TM.TAX.MRCH.WM.AR.ZS": "tariff"}

raw = wbdata.get_dataframe(
    indicator,
    country=countries,
    date=(datetime.datetime(1900,1,1), datetime.datetime(2020,12,31)),
    parse_dates=True
).reset_index()

# Drop rows with missing tariff
raw = raw.dropna(subset=['tariff'])

# --- 2. Label each row as 'C' or 'D' based on country median ---
medians = raw.groupby('country')['tariff'].median().to_dict()
raw['action'] = raw.apply(
    lambda row: 'C' if row['tariff'] <= medians[row['country']] else 'D',
    axis=1
)

# --- 3. Build next‑move forecasting dataset ---
def make_real_move_data(df, seq_len=5):
    """
    For each country, slide a window of length seq_len over its action series
    and predict the next action.
    Returns X (N, 2*seq_len) and y (N,).
    """
    X, y = [], []
    # ensure sorted by date
    df = df.sort_values(['country', 'date'])
    for country, group in df.groupby('country'):
        actions = group['action'].tolist()
        for i in range(len(actions) - seq_len):
            window = actions[i : i+seq_len]
            nxt    = actions[i+seq_len]
            feats = []
            # encode each round as two binary features: (you_defected, opp_defected)
            # here "you" == country, "opp" == same country since forecasting its own move
            for a in window:
                bit = int(a == 'D')
                feats.extend([bit, bit])
            X.append(feats)
            y.append(int(nxt == 'D'))
    return np.array(X), np.array(y)

# Choose sequence length
SEQ_LEN = 3
X_real, y_real = make_real_move_data(raw, seq_len=SEQ_LEN)

print("Dataset shapes:", X_real.shape, y_real.shape)

# --- 4. Train/test split ---
Xtr, Xte, ytr, yte = train_test_split(
    X_real, y_real, test_size=0.2, random_state=42, stratify=y_real
)

# --- 5. Train XGBoost ---
clf_real = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    n_estimators=1000,
    max_depth=3,
    learning_rate=0.1
)
clf_real.fit(Xtr, ytr)

# --- 6. Evaluate next‑move accuracy ---
y_pred = clf_real.predict(Xte)
acc = accuracy_score(yte, y_pred)
print(f"Next‑move forecast accuracy: {acc:.3f}")

Dataset shapes: (120, 6) (120,)


Parameters: { "use_label_encoder" } are not used.



Next‑move forecast accuracy: 0.917


In [233]:
seq_len = SEQ_LEN

results = []
years     = sorted(action_df.index)
countries = list(action_df.columns)

for country in countries:
    history = []
    score_model = 0
    score_country = 0

    for year in years:
        opp = action_df.loc[year, country]
        if pd.isna(opp): continue

        X_feat = build_features(history, seq_len)  # shape (1, n_feats)
        pC, pD = clf_real.predict_proba(X_feat)[0]       # [P(C), P(D)]
        model_action = 'C' if pD > pC else 'D'

        pm, po = PAYOFFS[(model_action, opp)]
        score_model  += pm
        score_country+= po
        history.append((model_action, opp))


    results.append({
        'country': country,
        'model_score': score_model,
        'country_score': score_country,
        'score_diff': score_model - score_country
    })

res_df = pd.DataFrame(results).sort_values('model_score', ascending=False)
print(res_df.to_string(index=False))

      country  model_score  country_score  score_diff
        Japan           89             44          45
United States           84             39          45
       Brazil           83             43          40
        China           69             34          35
        India           67             32          35
       Sweden           63             28          35
       France           57             27          30
      Germany           57             27          30
        Italy           57             27          30
