# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib
import requests
import sqlite3

# Extracting Item IDs from the Database

In [None]:
# get items ids from db
db_path = 'prod.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

cursor.execute("SELECT item_id FROM items")
item_ids = cursor.fetchall()

item_ids = [item[0] for item in item_ids]
df_item_ids = pd.DataFrame(item_ids, columns=['item_id'])

df_item_ids.to_csv('item_ids.csv', index=False)

conn.close()

print("Item IDs have been saved to 'item_ids.csv'")

Item IDs have been saved to 'item_ids.csv'


# Clustering

In [2]:
import pandas as pd

item_df = pd.read_csv("item_ids.csv")
item_ids = item_df["item_id"].tolist()

In [3]:
import requests
import pandas as pd
import numpy as np

def get_price_series(item_id):
    url = f"https://api.weirdgloop.org/exchange/history/rs/all?id={item_id}"
    try:
        response = requests.get(url)
        data = response.json()
        if str(item_id) in data:
            df = pd.DataFrame(data[str(item_id)])
            df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
            df = df.sort_values("timestamp").reset_index(drop=True)
            return df["price"].values
        else:
            return None
    except:
        return None

In [4]:
def preprocess_series(series, length=100):
    series = series[-length:]
    if len(series) < length:
        pad = [series[-1]] * (length - len(series))
        series = np.concatenate([pad, series])
    return series

In [None]:
import pandas as pd
import requests
import csv
import os
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

item_ids_df = pd.read_csv("item_ids.csv")
item_ids = item_ids_df["item_id"].tolist()

output_file = "price_features.csv"
failed_items = []

if not os.path.exists(output_file):
    with open(output_file, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["item_id"] + [f"diff_{i}" for i in range(50)])

scaler = StandardScaler()

for i, item_id in tqdm(enumerate(item_ids), total=len(item_ids)):
    try:
        response = requests.get(f"https://api.weirdgloop.org/exchange/history/rs/all?id={item_id}")

        if response.status_code != 200:
            failed_items.append(item_id)
            continue

        data = response.json()

        if str(item_id) not in data or not data[str(item_id)]:
            failed_items.append(item_id)
            continue

        df = pd.DataFrame(data[str(item_id)])
        df["price_diff"] = df["price"].diff()
        features = df["price_diff"].dropna().tail(50).values

        if len(features) == 50:
            normalized_features = scaler.fit_transform(features.reshape(-1, 1)).flatten()

            with open(output_file, "a", newline="") as f:
                writer = csv.writer(f)
                writer.writerow([item_id] + list(normalized_features))
        else:
            failed_items.append(item_id)

    except Exception as e:
        failed_items.append(item_id)

pd.DataFrame(failed_items, columns=["failed_item_id"]).to_csv("failed_items.csv", index=False)

In [6]:
X_df = pd.DataFrame(X)

X_df.to_csv("item_features.csv", index=False)

In [None]:
from sklearn.cluster import KMeans

n_clusters = 10
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
labels = kmeans.fit_predict(X)

cluster_map = pd.DataFrame({"item_id": valid_ids, "cluster": labels})
cluster_map.to_csv("item_cluster_map.csv", index=False)

In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

inertias = []
K_range = range(2, 20)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertias.append(kmeans.inertia_)

plt.figure(figsize=(10, 5))
plt.plot(K_range, inertias, marker='o')
plt.xlabel("Number of Clusters (k)")
plt.ylabel("Inertia")
plt.title("Elbow Method for Optimal k")
plt.grid(True)
plt.show()

# Training and Testing the Model (One Item)

In [None]:
# call api
url = "https://api.weirdgloop.org/exchange/history/rs/all?id=2"

response = requests.get(url)
data = response.json()

item_id = "2"
if item_id in data:
    df = pd.DataFrame(data[item_id])  # convert to df
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")  # Convert timestamp from milliseconds
    print(df.head())
else:
    print(f"Item ID {item_id} not found in response.")

  id  price  volume  timestamp
0  2    188     NaN 2008-05-21
1  2    186     NaN 2008-05-22
2  2    186     NaN 2008-05-23
3  2    184     NaN 2008-05-24
4  2    184     NaN 2008-05-25


In [None]:
# create features
df["prev_price"] = df["price"].shift(1)
X = df[["prev_price"]]  # features
y = df["price"]  # target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 8.019659037963061


In [None]:
# save model
joblib.dump(model, "runescape_model.pkl")

['runescape_model.pkl']

In [None]:
# load model
model = joblib.load("runescape_model.pkl")

In [None]:
# test prediction
latest_price = df["prev_price"].iloc[-1]

next_input = pd.DataFrame([[latest_price]], columns=["prev_price"])

predicted_price = model.predict(next_input)

print(f"Predicted Next Price: {predicted_price[0]}")

Predicted Next Price: 1519.801047619048
