In [1]:
import csv
from collections import deque
import statistics

In [3]:
def read_series(path, key):
    series = []
    with open(path, newline='') as f:
        reader = csv.DictReader(f)
        for row in reader:
            series.append(float(row[key]))
    return series

def rolling_stats(series, window):
    means = []
    stds  = []
    buf = deque(maxlen=window)
    for x in series:
        buf.append(x)
        if len(buf) < window:
            means.append(None)
            stds.append(None)
        else:
            means.append(statistics.mean(buf))
            stds.append(statistics.stdev(buf))
    return means, stds

def detect_anomalies(series, rolling_mu, rolling_sigma, z_thresh=3.0):
    anomalies = []
    for i, (x, mu, sigma) in enumerate(zip(series, rolling_mu, rolling_sigma)):
        if mu is None or sigma is None or sigma == 0:
            continue
        z = (x - mu) / sigma
        if abs(z) > z_thresh:
            anomalies.append((i, x, z))
    return anomalies

if __name__ == "__main__":
    path = "/content/gemini_BTCUSD_2020_1min.csv"
    col  = "Close"
    window = 20
    z_thresh = 3.0

    series = read_series(path, col)
    mu, sigma = rolling_stats(series, window)
    anomalies = detect_anomalies(series, mu, sigma, z_thresh)

    print(f"Found {len(anomalies)} anomalies in column '{col}':")
    for idx, val, z in anomalies:
        print(f"  Row {idx:4d}: value={val:.2f}, z_score={z:.2f}")

Found 79 anomalies in column 'Close':
  Row  355: value=55868.23, z_score=3.34
  Row  412: value=54949.35, z_score=-3.12
  Row  468: value=54775.16, z_score=-3.09
  Row 1118: value=56549.52, z_score=-3.33
  Row 2131: value=54271.05, z_score=-3.34
  Row 2163: value=53322.43, z_score=-3.13
  Row 2401: value=56572.05, z_score=3.03
  Row 2480: value=55000.00, z_score=-3.49
  Row 2852: value=59400.00, z_score=-3.15
  Row 3357: value=60073.04, z_score=-3.24
  Row 3504: value=61010.93, z_score=-3.21
  Row 3505: value=60949.78, z_score=-3.06
  Row 3567: value=60749.28, z_score=-3.08
  Row 4036: value=62160.00, z_score=-3.01
  Row 4166: value=62219.56, z_score=-3.16
  Row 4385: value=61756.49, z_score=3.15
  Row 5067: value=60410.72, z_score=-3.29
  Row 5305: value=61475.03, z_score=-3.19
  Row 5366: value=61171.90, z_score=-3.40
  Row 5487: value=62533.17, z_score=3.27
  Row 6081: value=62928.75, z_score=3.19
  Row 6886: value=63002.64, z_score=3.18
  Row 7314: value=62616.02, z_score=-3.14
  

* **read_series:** I pull out one numeric column (e.g. “Close” prices) and turn it into a Python list of floats.

* **rolling_stats:** I keep a sliding window (deque) of the last N values (say, 20), and for each point I record the mean and standard deviation of that window.

* **detect_anomalies:** I go through each data point, compute its z-score (how many sigmas away from the rolling mean), and if it exceeds our threshold (3 by default), I flag it.