In [1]:
import requests
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

API_KEY = "CG-x84hs33zGnSaQrfM7s8yJygs"

# Step 1: FIXED API CALL (NO interval PARAM)
url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart"
params = {
    "vs_currency": "usd",
    "days": "30",                  # 2–90 days auto-returns hourly data
    "x_cg_demo_api_key": API_KEY
}

data = requests.get(url, params=params).json()

# Check for errors
if "prices" not in data:
    print("API error:", data)
    raise SystemExit("No 'prices' returned — but NOT because of interval. Run again in 30 sec.")

# Step 2: Build DataFrame
df = pd.DataFrame(data["prices"], columns=["timestamp", "price"])
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")

# Step 3: Feature engineering
df["pct_change"] = df["price"].pct_change()
df["ma_5"] = df["price"].rolling(5).mean()
df["ma_15"] = df["price"].rolling(15).mean()

# Predict 5 time-steps ahead (5 hours)
df["future_price"] = df["price"].shift(-5)
df["label"] = (df["future_price"] > df["price"]).astype(int)

df = df.dropna().reset_index(drop=True)

# Step 4: Prepare data
features = ["pct_change", "ma_5", "ma_15"]
X = df[features]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, shuffle=False, test_size=0.2
)

# Step 5: Train model
model = RandomForestClassifier(n_estimators=150, random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 7: Show 5 wrong predictions
wrong_idx = np.where(y_pred != y_test)[0][:5]

print("\n----- 5 Misclassified Samples -----\n")
for i in wrong_idx:
    global_index = len(X_train) + i
    print("Index:", global_index)
    print("Timestamp:", df.iloc[global_index]["timestamp"])
    print("True:", y_test.iloc[i], "| Pred:", y_pred[i])
    print(df.iloc[global_index][features])
    print()


Accuracy: 0.5460992907801419

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.11      0.20        72
           1       0.52      1.00      0.68        69

    accuracy                           0.55       141
   macro avg       0.76      0.56      0.44       141
weighted avg       0.76      0.55      0.44       141


----- 5 Misclassified Samples -----

Index: 562
Timestamp: 2025-11-20 02:01:30.328000
True: 0 | Pred: 1
pct_change        0.007412
ma_5            91330.0281
ma_15         90571.993566
Name: 562, dtype: object

Index: 563
Timestamp: 2025-11-20 03:01:25.312000
True: 0 | Pred: 1
pct_change        0.000537
ma_5          91723.398972
ma_15         90650.772489
Name: 563, dtype: object

Index: 564
Timestamp: 2025-11-20 04:00:18.745000
True: 0 | Pred: 1
pct_change       -0.001583
ma_5          92121.950273
ma_15         90698.754534
Name: 564, dtype: object

Index: 565
Timestamp: 2025-11-20 05:01:43.021000
True: 0 | 