In [42]:
!pip install requests pandas matplotlib seaborn scikit-learn numpy

Defaulting to user installation because normal site-packages is not writeable


In [43]:
import requests
import datetime as dt
import itertools
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from order_book import order_book_features
from order_book import fetch_sell_orders


In [None]:
ITEMS   = ["T4_BAG", "T5_BAG", "T4_CAPE", "T5_CAPE", "T4_HEAD_PLATE_SET1",
           "T5_HEAD_PLATE_SET1", "T4_2H_AXE", "T5_2H_AXE", "T4_2H_BOW", "T5_2H_BOW"]
CITIES  = ["Caerleon", "Bridgewatch", "Lymhurst", "Fort Sterling", "Martlock"]
URL     = "https://west.albion-online-data.com/api/v2/stats/history/{}.json"

today   = dt.date.today()
start   = today - dt.timedelta(days=30)

def fetch_history(item):
    params = {
        "date": start.isoformat(),
        "end_date": today.isoformat(),
        "locations": ",".join(CITIES),
        "time-scale": 24,                 # daily granularity
        "qualities": 1                    # normal quality only
    }
    return requests.get(URL.format(item), params=params).json()

raw = {item: fetch_history(item) for item in ITEMS}

print(raw)

{'T4_BAG': [{'location': 'Bridgewatch', 'item_id': 'T4_BAG', 'quality': 1, 'data': [{'item_count': 45, 'avg_price': 2775, 'timestamp': '2025-07-19T04:00:00'}, {'item_count': 62, 'avg_price': 2872, 'timestamp': '2025-07-19T05:00:00'}, {'item_count': 21, 'avg_price': 2848, 'timestamp': '2025-07-19T06:00:00'}, {'item_count': 28, 'avg_price': 2825, 'timestamp': '2025-07-19T07:00:00'}, {'item_count': 10, 'avg_price': 2844, 'timestamp': '2025-07-19T08:00:00'}, {'item_count': 13, 'avg_price': 2729, 'timestamp': '2025-07-19T09:00:00'}, {'item_count': 4, 'avg_price': 3010, 'timestamp': '2025-07-19T10:00:00'}, {'item_count': 16, 'avg_price': 2765, 'timestamp': '2025-07-19T11:00:00'}, {'item_count': 32, 'avg_price': 2755, 'timestamp': '2025-07-19T12:00:00'}, {'item_count': 34, 'avg_price': 2846, 'timestamp': '2025-07-19T13:00:00'}, {'item_count': 43, 'avg_price': 2705, 'timestamp': '2025-07-19T14:00:00'}, {'item_count': 35, 'avg_price': 2851, 'timestamp': '2025-07-19T15:00:00'}, {'item_count': 22

In [45]:
records = []
for item, data in raw.items():
    for entry in data:
        for daily_data in entry["data"]:
            records.append({
                "item": item,
                "city": entry["location"],
                "timestamp": pd.to_datetime(daily_data["timestamp"]),
                "price": daily_data["avg_price"]
            })
df = pd.DataFrame(records).sort_values(["item", "city", "timestamp"])
df["log_price"] = np.log(df["price"])

display(df.head())

Unnamed: 0,item,city,timestamp,price,log_price
1941,T4_2H_AXE,Bridgewatch,2025-07-18 20:00:00,3007,8.008698
1942,T4_2H_AXE,Bridgewatch,2025-07-18 21:00:00,5073,8.531688
1943,T4_2H_AXE,Bridgewatch,2025-07-18 22:00:00,5983,8.696677
1944,T4_2H_AXE,Bridgewatch,2025-07-18 23:00:00,5984,8.696845
1945,T4_2H_AXE,Bridgewatch,2025-07-19 00:00:00,2345,7.760041


In [46]:
def rolling_z(series, window=7):
    roll = series.rolling(window, min_periods=3)
    return (series - roll.mean()) / roll.std()

df["z"] = df.groupby(["item", "city"])["log_price"].transform(rolling_z)

display(df.head())

Unnamed: 0,item,city,timestamp,price,log_price,z
1941,T4_2H_AXE,Bridgewatch,2025-07-18 20:00:00,3007,8.008698,
1942,T4_2H_AXE,Bridgewatch,2025-07-18 21:00:00,5073,8.531688,
1943,T4_2H_AXE,Bridgewatch,2025-07-18 22:00:00,5983,8.696677,0.791593
1944,T4_2H_AXE,Bridgewatch,2025-07-18 23:00:00,5984,8.696845,0.654613
1945,T4_2H_AXE,Bridgewatch,2025-07-19 00:00:00,2345,7.760041,-1.347927


In [47]:
df["tier"] = df["item"].str.extract(r"T(\d)")[0].astype(int)
df["slot"] = df["item"].str.split("_").str[2]

peer_median = df.groupby(["timestamp", "tier", "slot"])["log_price"].transform("median")
df["peer_dev"] = df["log_price"] - peer_median

In [48]:
df["abs_z"]        = df["z"].abs()
df["abs_peer_dev"] = df["peer_dev"].abs()

X = df[["abs_z", "abs_peer_dev"]].fillna(0)

iso = IsolationForest(contamination=0.01, random_state=42)
df["manipulated"] = iso.fit_predict(X) == -1

In [49]:
print(df["manipulated"].value_counts())
# True  -> number of predictions = 1
# False -> everything else

manipulated
False    2325
True       24
Name: count, dtype: int64


In [50]:
display(df.loc[df['manipulated']==True])

Unnamed: 0,item,city,timestamp,price,log_price,z,tier,slot,peer_dev,abs_z,abs_peer_dev,manipulated
1959,T4_2H_AXE,Caerleon,2025-07-23 02:00:00,34193,10.439776,,4,AXE,1.048111,,1.048111,True
1969,T4_2H_AXE,Caerleon,2025-07-23 18:00:00,34200,10.439981,0.373645,4,AXE,1.08103,0.373645,1.08103,True
1972,T4_2H_AXE,Caerleon,2025-07-23 22:00:00,18713,9.836974,-0.111804,4,AXE,0.897137,0.111804,0.897137,True
2012,T4_2H_AXE,Fort Sterling,2025-07-23 02:00:00,4203,8.343554,-0.196991,4,AXE,-1.048111,0.196991,1.048111,True
2021,T4_2H_AXE,Fort Sterling,2025-07-23 18:00:00,3936,8.27792,-1.884345,4,AXE,-1.08103,1.884345,1.08103,True
2025,T4_2H_AXE,Fort Sterling,2025-07-23 22:00:00,3111,8.042699,-1.213764,4,AXE,-0.897137,1.213764,0.897137,True
1592,T4_HEAD_PLATE_SET1,Caerleon,2025-07-20 18:00:00,2245,7.716461,,4,PLATE,0.782064,,0.782064,True
1593,T4_HEAD_PLATE_SET1,Caerleon,2025-07-20 19:00:00,3324,8.108924,0.57735,4,PLATE,1.08161,0.57735,1.08161,True
1595,T4_HEAD_PLATE_SET1,Caerleon,2025-07-20 21:00:00,3333,8.111628,0.458738,4,PLATE,1.060639,0.458738,1.060639,True
1596,T4_HEAD_PLATE_SET1,Caerleon,2025-07-20 22:00:00,87,4.465908,-2.029429,4,PLATE,-2.347536,2.029429,2.347536,True


In [51]:
# Show the peer median for the same days
peer = (df[(df["tier"]==4)&(df["slot"]=="AXE")]
          .groupby("timestamp")["log_price"]
          .median())

# Compare
suspicious = df[df["manipulated"]]
for ts in suspicious["timestamp"].unique():
    print(ts, "peer median:", peer[ts], "axe price:",
          suspicious.loc[suspicious["timestamp"]==ts, "log_price"].values)

2025-07-23 02:00:00 peer median: 9.391665029371882 axe price: [10.43977622  8.34355384  6.52941884]
2025-07-23 18:00:00 peer median: 9.35895059061142 axe price: [10.43998092  8.27792026  6.57228254]
2025-07-23 22:00:00 peer median: 8.939836622687713 axe price: [9.83697375 8.0426995  6.58340922]
2025-07-20 18:00:00 peer median: 8.61122983334262 axe price: [7.7164608]
2025-07-20 19:00:00 peer median: 8.332308352219117 axe price: [8.10892416]


KeyError: Timestamp('2025-07-20 21:00:00')

In [None]:
ORDERS_URL = "https://west.albion-online-data.com/api/v2/stats/orders/{}.json"

# Example: today's Caerleon T4_2H_AXE sell orders
sell_df = fetch_sell_orders("T4_2H_AXE", "Caerleon")

In [None]:
ob_feat = order_book_features(sell_df)

In [None]:
# Build a small cache so we don’t hammer the API
order_cache = {}

def get_ob_features(item, city, date):
    key = (item, city, date.date())
    if key in order_cache:
        return order_cache[key]
    sells = fetch_sell_orders(item, city)
    feats = order_book_features(sells)
    order_cache[key] = feats
    return feats

# vectorised assignment
df = df.sort_values(["item", "city", "timestamp"])
new_cols = df.apply(
    lambda r: pd.Series(get_ob_features(r["item"], r["city"], r["timestamp"])),
    axis=1
)
df = pd.concat([df, new_cols], axis=1)

In [None]:
X = df[["abs_z", "abs_peer_dev",
        "wall_thickness", "wall_concentration"]].fillna(0)

iso = IsolationForest(contamination=0.01, random_state=42)
df["manipulated_v2"] = iso.fit_predict(X) == -1

In [None]:
# How many new flags?
print(df["manipulated_v2"].value_counts())

# Inspect
display(df[df["manipulated_v2"]]
        [["item","city","timestamp","price",
          "wall_thickness","wall_concentration"]])

manipulated_v2
False    1212
True       13
Name: count, dtype: int64


Unnamed: 0,item,city,timestamp,price,wall_thickness,wall_concentration
873,T4_2H_AXE,Caerleon,2025-06-24,43693,0,0
874,T4_2H_AXE,Caerleon,2025-06-25,43693,0,0
875,T4_2H_AXE,Caerleon,2025-06-26,43691,0,0
879,T4_2H_AXE,Caerleon,2025-06-30,23353,0,0
881,T4_2H_AXE,Caerleon,2025-07-02,16509,0,0
882,T4_2H_AXE,Caerleon,2025-07-03,22286,0,0
884,T4_2H_AXE,Caerleon,2025-07-05,16176,0,0
886,T4_2H_AXE,Caerleon,2025-07-07,26999,0,0
893,T4_2H_AXE,Caerleon,2025-07-14,34999,0,0
895,T4_2H_AXE,Caerleon,2025-07-16,31465,0,0
