In [6]:
import subprocess
import os
import glob
import json
from awpy import Demo
from awpy.stats import adr, kast, rating, calculate_trades
import gc
import pandas as pd

HALF_ROUNDS = 12

def estimate_tickrate_rounds_median(rounds_raw: pd.DataFrame, freeze_time_seconds: int = 15) -> float | None:
    if rounds_raw is None or rounds_raw.empty:
        return None

    start_col = None
    for c in ["start_tick", "start", "startTick", "freeze_start_tick"]:
        if c in rounds_raw.columns:
            start_col = c
            break

    freeze_end_col = None
    for c in ["freeze_end_tick", "freeze_end", "freezeEndTick", "freezeEnd"]:
        if c in rounds_raw.columns:
            freeze_end_col = c
            break

    if not start_col or not freeze_end_col or freeze_time_seconds <= 0:
        return None

    rr = rounds_raw[[start_col, freeze_end_col]].dropna().copy()
    if rr.empty:
        return None

    rr["freeze_ticks"] = rr[freeze_end_col].astype(float) - rr[start_col].astype(float)
    rr = rr[rr["freeze_ticks"] > 0]
    if rr.empty:
        return None

    rr["tickrate_est"] = rr["freeze_ticks"] / float(freeze_time_seconds)
    rr = rr[(rr["tickrate_est"] >= 30) & (rr["tickrate_est"] <= 256)]
    if rr.empty:
        return None

    if len(rr) >= 5:
        rr = rr.iloc[1:].copy()
        if rr.empty:
            return None

    return float(rr["tickrate_est"].median())


def estimate_tickrate_demoparser2(demo_path: str) -> float | None:
    try:
        from demoparser2 import DemoParser
        parser = DemoParser(str(demo_path))
        df = parser.parse_ticks(["tick", "game_time"])
        if df is None or df.empty:
            return None

        d = df[["tick", "game_time"]].dropna().copy().sort_values("tick")
        d["dtick"] = d["tick"].diff()
        d["dtime"] = d["game_time"].diff()
        d = d[(d["dtick"] > 0) & (d["dtime"] > 0) & (d["dtime"] < 1.0)]
        if d.empty:
            return None

        tickrate = float((d["dtick"] / d["dtime"]).median())
        return tickrate if 16 <= tickrate <= 256 else None
    except Exception:
        return None


def add_team_winner(rounds_df, meta_info, half_rounds: int):
    """
    rounds_df: склеенный по всем частям датафрейм rounds
    meta_info: data["meta"] по карте
    """
    team1 = meta_info["team1"]
    team2 = meta_info["team2"]

    if (team1.get("side") or "").upper() == "CT":
        ct_first = team1["name"]
        t_first = team2["name"]
    else:
        ct_first = team2["name"]
        t_first = team1["name"]

    def map_winner(row):
        w = row["winner"]
        if w not in ("ct", "t"):
            return w

        round_num = int(row["round_num"])

        # 1-я половина
        if round_num <= half_rounds:
            return ct_first if w == "ct" else t_first

        # 2-я половина
        if round_num <= 2 * half_rounds:
            return t_first if w == "ct" else ct_first

        # ОТ (блоки по 3 раунда)
        ot_index = round_num - 2 * half_rounds - 1
        ot_block = ot_index // 3

        if ot_block % 2 == 0:
            return ct_first if w == "t" else t_first
        else:
            return t_first if w == "t" else ct_first

    rounds_df["team_winner"] = rounds_df.apply(map_winner, axis=1)
    return rounds_df


# -------------------------------
# NEW: helpers for gluing demo parts and multi-map merges
# -------------------------------

_TICKLIKE_EXTRA = {
    "tick", "start", "freeze_end", "end", "official_end",
    "start_tick", "freeze_end_tick", "end_tick", "official_end_tick",
    "bomb_plant", "plant_tick", "defuse_tick", "explode_tick",
}

def _is_ticklike_col(col: str) -> bool:
    c = str(col).lower()
    return ("tick" in c) or (c in _TICKLIKE_EXTRA)

def _apply_offsets(df: pd.DataFrame, round_offset: int = 0, tick_offset: int = 0) -> pd.DataFrame:
    """
    Сдвигаем round_num и все tick-похожие колонки, чтобы корректно склеивать p1/p2 демки одной карты.
    """
    if df is None or df.empty:
        return df

    out = df.copy()

    if round_offset and "round_num" in out.columns:
        out["round_num"] = pd.to_numeric(out["round_num"], errors="coerce").fillna(0).astype(int) + int(round_offset)

    if tick_offset:
        for col in out.columns:
            if not _is_ticklike_col(col):
                continue
            s = pd.to_numeric(out[col], errors="coerce")
            if s.notna().any():
                out[col] = s + int(tick_offset)

    return out

def _estimate_part_max_tick(data: dict) -> int:
    """
    Для части карты берём максимально возможный тик (чтобы корректно сдвигать следующую часть).
    """
    candidates = []
    for key in ["ticks", "grenades", "rounds", "rounds_raw", "kills", "damages", "shots", "bomb", "smokes", "infernos", "footsteps"]:
        df = data.get(key)
        if df is None or getattr(df, "empty", True):
            continue
        if "tick" in df.columns:
            tmax = pd.to_numeric(df["tick"], errors="coerce").dropna()
            if not tmax.empty:
                candidates.append(int(tmax.max()))
        for c in ["official_end", "end", "freeze_end", "start", "official_end_tick", "end_tick", "freeze_end_tick", "start_tick"]:
            if c in df.columns:
                vmax = pd.to_numeric(df[c], errors="coerce").dropna()
                if not vmax.empty:
                    candidates.append(int(vmax.max()))
    return max(candidates) if candidates else 0


class CSVtor:
    def __init__(self, path_to_zips, metadata_path, extract_to=None):
        self.zips = path_to_zips
        self.metadata = metadata_path  # путь к JSON с метаданными
        self.extract = extract_to

    def del_zip(self, rar_name_or_path: str):
        if not os.path.isabs(rar_name_or_path):
            rar_path = os.path.join(self.zips, rar_name_or_path)
        else:
            rar_path = rar_name_or_path

        if os.path.exists(rar_path):
            try:
                os.remove(rar_path)
                print(f"Удалён архив: {rar_path}")
            except OSError as e:
                print(f"Не удалось удалить архив {rar_path}: {e}")
        else:
            print(f"Архив для удаления не найден: {rar_path}")

    def del_demo(self, dem_name_or_path: str):
        if not os.path.isabs(dem_name_or_path):
            dem_path = os.path.join(self.extract, dem_name_or_path)
        else:
            dem_path = dem_name_or_path

        if os.path.exists(dem_path):
            try:
                os.remove(dem_path)
                print(f"Удалён DEM-файл: {dem_path}")
            except OSError as e:
                print(f"Не удалось удалить DEM-файл {dem_path}: {e}")
        else:
            print(f"DEM-файл для удаления не найден: {dem_path}")

    def _extract_with_winrar(self):
        with open(self.metadata, "r", encoding="utf-8") as f:
            demos_metadata = json.load(f)

        rar_to_metadata = {}
        for match_id, info in demos_metadata.items():
            demo_file = info.get("demo_file")
            if demo_file:
                rar_to_metadata[demo_file] = (match_id, info)

        if self.extract is None:
            self.extract = os.path.join(self.zips, "extracted")
        os.makedirs(self.extract, exist_ok=True)

        winrar_paths = [
            r"C:\Program Files\WinRAR\WinRAR.exe",
            r"C:\Program Files (x86)\WinRAR\WinRAR.exe",
            r"C:\Program Files\WinRAR\Rar.exe",
            r"C:\Program Files (x86)\WinRAR\Rar.exe",
        ]

        winrar = None
        for path in winrar_paths:
            if os.path.exists(path):
                winrar = path
                print(f"Найден WinRAR: {path}")
                break

        if not winrar:
            try:
                import winreg
                key = winreg.OpenKey(
                    winreg.HKEY_LOCAL_MACHINE,
                    r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\WinRAR.exe",
                )
                winrar = winreg.QueryValue(key, None)
                winreg.CloseKey(key)
                print(f"Найден WinRAR через реестр: {winrar}")
            except Exception:
                print("WinRAR не найден. Установите WinRAR: https://www.win-rar.com/")
                return {}

        rar_files = [f for f in os.listdir(self.zips) if f.lower().endswith(".rar")]

        successful = 0
        dem_to_map_sides = {}

        for rar_file in rar_files:
            full_path = os.path.join(self.zips, rar_file)
            dem_before = set(glob.glob(os.path.join(self.extract, "*.dem")))

            try:
                print(f"\n--- Распаковка {rar_file} ---")

                cmd = [winrar, "x", "-y", "-o+", "-ibck", full_path, self.extract]
                result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)

                if result.returncode == 0:
                    print("Успешно распакован через WinRAR!")
                    successful += 1
                else:
                    print(f"Ошибка WinRAR (код {result.returncode})")
                    if result.stdout:
                        print(f"Вывод: {result.stdout}")
                    if result.stderr:
                        print(f"Ошибки: {result.stderr}")
                    continue

                dem_after = set(glob.glob(os.path.join(self.extract, "*.dem")))
                new_dems = sorted(dem_after - dem_before)

                if not new_dems:
                    print("Новых .dem файлов не найдено для этого архива")
                    continue

                meta_entry = rar_to_metadata.get(rar_file)
                if not meta_entry:
                    print(f"Нет записи в demos_metadata для архива: {rar_file}")
                    continue

                match_id, meta = meta_entry
                maps_info = meta.get("maps", {})
                if not maps_info:
                    print(f"В метаданных нет ключа 'maps' для архива: {rar_file}")
                    continue

                for dem_path in new_dems:
                    dem_name = os.path.basename(dem_path)
                    base_name = os.path.splitext(dem_name)[0]
                    tokens = base_name.split("-")

                    map_index = None
                    for t in tokens:
                        if t.startswith("m") and t[1:].isdigit():
                            map_index = int(t[1:])
                            break

                    if map_index is None:
                        print(f"Не удалось определить номер карты по имени {dem_name}, пропускаю.")
                        continue

                    map_key = f"map_{map_index}"
                    map_meta = maps_info.get(map_key)

                    if not isinstance(map_meta, dict):
                        print(f"В метаданных матча {match_id} нет нормальной записи для {map_key}, "
                              f"ставлю пустой meta для {dem_name}.")
                        map_meta = {}

                    full_meta = dict(map_meta)
                    full_meta["rar_file"] = rar_file
                    full_meta["match_id"] = match_id

                    dem_to_map_sides[dem_name] = full_meta

            except subprocess.TimeoutExpired:
                print("Таймаут распаковки")
            except Exception as e:
                print(f"Ошибка: {e}")

        print(f"\n--- ИТОГО ---")
        print(f"Успешно распаковано: {successful}/{len(rar_files)}")
        print(f"Всего .dem с сопоставлёнными сторонами: {len(dem_to_map_sides)}")

        return dem_to_map_sides

    def _mark_old_demo_and_cleanup(self, dem_name, info, dem_path):
        match_id = info.get("match_id")
        rar_file = info.get("rar_file")
        print(f"[OLD DEMO] {dem_name}, match_id={match_id}, rar={rar_file}")

        metas = None
        try:
            with open(self.metadata, "r", encoding="utf-8") as f:
                metas = json.load(f)
        except Exception as e:
            print(f"Не удалось прочитать {self.metadata} для обновления: {e}")

        if isinstance(metas, dict):
            updated = False

            if match_id and match_id in metas:
                metas[match_id]["maps"] = {"map_1": -1}
                updated = True
            else:
                for mid, meta in metas.items():
                    if meta.get("demo_file") == rar_file:
                        meta["maps"] = {"map_1": -1}
                        updated = True
                        break

            if updated:
                try:
                    with open(self.metadata, "w", encoding="utf-8") as f:
                        json.dump(metas, f, ensure_ascii=False, indent=2)
                    print("Метаданные обновлены: maps = {'map_1': -1}")
                except Exception as e:
                    print(f"Не удалось сохранить обновлённый {self.metadata}: {e}")
            else:
                print("Не удалось найти запись в метаданных, чтобы пометить демку как старую")

        self.del_demo(dem_path)
        if rar_file:
            self.del_zip(rar_file)

    def _dem_to_csv_list(self, dem_meta, half_rounds: int = 12):
        for dem_name, info in dem_meta.items():
            dem_path = os.path.join(self.extract, dem_name)
            if not os.path.exists(dem_path):
                print(f"DEM-файл не найден: {dem_path}, пропускаю")
                continue

            print(f"Парсим {dem_name}...")

            try:
                demo = Demo(dem_path)
                demo.parse(player_props=[
                    "X", "Y", "Z",
                    "health",
                    "armor_value",
                    "has_helmet",
                    "has_defuser",
                    "inventory",
                    "balance"
                ])
            except Exception as e:
                print(f"Не удалось распарсить {dem_name}: {e}")
                print("Считаем демку старой: maps -> {'map_1': -1}, удаляем демку и архив.")
                self._mark_old_demo_and_cleanup(dem_name, info, dem_path)
                continue

            rounds_df = demo.rounds.to_pandas().copy()

            header = demo.header
            rounds_raw = demo.rounds.to_pandas()
            grenades = demo.grenades.to_pandas()
            damages = demo.damages.to_pandas()
            bomb = demo.bomb.to_pandas()
            smokes = demo.smokes.to_pandas()
            infernos = demo.infernos.to_pandas()
            shots = demo.shots.to_pandas()
            ticks = demo.ticks.to_pandas()
            rating_df = rating(demo).to_pandas()
            kast_df = kast(demo).to_pandas()
            adr_df = adr(demo).to_pandas()
            trades_df = calculate_trades(demo).to_pandas()
            kills = demo.kills.to_pandas()
            footsteps = demo.footsteps.to_pandas()

            meta_json = {
                "tickrate_rounds_median": estimate_tickrate_rounds_median(rounds_raw, freeze_time_seconds=15),
                "tickrate_demoparser2": estimate_tickrate_demoparser2(str(dem_path)),
                "freeze_time_seconds": 15,
                "round_time_seconds": 115,
            }

            os.makedirs(OUT_JSON_DIR, exist_ok=True)

            prefix = os.path.splitext(dem_name)[0]
            meta_path = os.path.join(OUT_JSON_DIR, f"{prefix}_meta.json")

            with open(meta_path, "w", encoding="utf-8") as f:
                json.dump(meta_json, f, ensure_ascii=False, indent=2)

            print("Сохранён META JSON:", meta_path)

            data = {
                "dem_name": dem_name,
                "meta": info,
                "rounds": rounds_df,
                "header": header,
                "rounds_raw": rounds_raw,
                "grenades": grenades,
                "damages": damages,
                "bomb": bomb,
                "smokes": smokes,
                "infernos": infernos,
                "shots": shots,
                "ticks": ticks,
                "rating": rating_df,
                "kast": kast_df,
                "adr": adr_df,
                "trades": trades_df,
                "kills": kills,
                "footsteps": footsteps,
                "meta_path": meta_path,
            }

            yield dem_name, data

            try:
                demo.close()
            except Exception:
                pass

            del demo
            gc.collect()
            self.del_demo(dem_path)

    def dem_to_csv(self, half_rounds: int = 12):
        dem_meta = self._extract_with_winrar()
        if not dem_meta:
            return

        rar_to_all_dems = {}
        for dem_name, info in dem_meta.items():
            rar_file = info.get("rar_file")
            if not rar_file:
                continue
            rar_to_all_dems.setdefault(rar_file, set()).add(dem_name)

        rar_to_processed_dems = {rar: set() for rar in rar_to_all_dems}

        for dem_name, data in self._dem_to_csv_list(dem_meta, half_rounds=half_rounds):
            info = data["meta"]
            rar_file = info.get("rar_file")

            if rar_file in rar_to_processed_dems:
                rar_to_processed_dems[rar_file].add(dem_name)

                if rar_to_processed_dems[rar_file] >= rar_to_all_dems[rar_file]:
                    self.del_zip(rar_file)

            yield dem_name, data


In [7]:
import json, os, time
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

class HLTV_scrapper:
    def __init__(self, cookies_file="hltv_cf_cookies.json",
                 base="https://www.hltv.org/",
                 download_dir="C:\\Users\\saidk\\scripts\\demos"):
        self.cookies = cookies_file
        self.base = base
        opts = uc.ChromeOptions()
        opts.add_argument("--window-size=1400,900")
        opts.add_argument("--disable-blink-features=AutomationControlled")
        prefs = {
            "download.default_directory": download_dir,
            "download.prompt_for_download": False,
            "download.directory_upgrade": True,
            "safebrowsing.enabled": True
        }
        opts.add_experimental_option("prefs", prefs)
        self.driver = uc.Chrome(options=opts, use_subprocess=True, headless=False)
        self.driver.set_page_load_timeout(30)

    def _save_cookies(self):
        with open(self.cookies, "w", encoding="utf-8") as f:
            json.dump(self.driver.get_cookies(), f, indent=2, ensure_ascii=False)

    def _load_cookies(self):
        if not os.path.exists(self.cookies):
            return False
        with open(self.cookies, "r", encoding="utf-8") as f:
            cookies = json.load(f)
        self.driver.get(self.base)
        for c in cookies:
            c.pop("sameSite", None)
            try:
                self.driver.add_cookie(c)
            except Exception:
                pass
        return True

    def _wait_cloudflare_pass(self, max_wait=90):
        wait = WebDriverWait(self.driver, max_wait)
        try:
            wait.until(lambda d: d.title.strip().lower() != "just a moment...")
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "body")))
            time.sleep(1.0)
            return True
        except Exception:
            return False

    def endpData(self, target):
        try:
            had = self._load_cookies()
            try:
                response = self.driver.get(target)
            except TimeoutException:
                print(f"[WARN] Таймаут при загрузке {target}, останавливаем загрузку вкладки")
                try:
                    self.driver.execute_script("window.stop();")
                except Exception as e:
                    print(f"[WARN] window.stop() failed: {e}")
                response = None

            if not had:
                print("Нет сохранённых куки. Кликните вручную 'Verify you are human' (Cloudflare).")
                ok = self._wait_cloudflare_pass(max_wait=120)
                if not ok:
                    raise RuntimeError("Cloudflare не пропустил в отведённое время.")
                self._save_cookies()
                print("Куки Cloudflare сохранены.")

            print("Title:", self.driver.title)
            return response

        except Exception as e:
            print("Ошибка в endpData:", e)

    def find_elem(self, elem_struct):
        return self.driver.find_elements(By.CSS_SELECTOR, elem_struct)

    def quit(self):
        self.driver.quit()


In [8]:
import json, os, time, re
from urllib.parse import urlencode, urljoin

class HLTVDemo:
    def __init__(
        self,
        cookies_file="hltv_cf_cookies.json",
        base="https://www.hltv.org/",
        download_dir="C:\\Users\\saidk\\scripts\\demos",
        downloaded_file="downloaded_matches.json",
        metadata_file="demos_metadata.json",
    ):
        self.scrapper = HLTV_scrapper(cookies_file, base, download_dir)
        self.download_dir = download_dir
        self.downloaded_file = downloaded_file
        self.downloaded_ids = self._load_downloaded_ids()
        self.metadata_file = metadata_file
        self.metadata = self._load_metadata()

    def _load_metadata(self):
        if not os.path.exists(self.metadata_file):
            return {}
        try:
            with open(self.metadata_file, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception as e:
            print(f"Cannot read {self.metadata_file}: {e}")
            return {}

    def _save_metadata(self):
        try:
            with open(self.metadata_file, "w", encoding="utf-8") as f:
                json.dump(self.metadata, f, ensure_ascii=False, indent=2)
        except Exception as e:
            print(f"Cannot save {self.metadata_file}: {e}")

    def _extract_match_id_generic(self, url: str) -> str:
        print("URL:", url)
        if "mapstatsid/" in url:
            try:
                part = url.split("mapstatsid/", 1)[1]
                return part.split("/", 1)[0]
            except Exception:
                return ""
        if "/matches/" in url:
            try:
                part = url.split("/matches/", 1)[1]
                return part.split("/", 1)[0]
            except Exception:
                return ""
        m = re.search(r"(\d+)", url)
        return m.group(1) if m else ""

    def _get_stat_matches_page(self, params):
        url = "https://www.hltv.org/results"
        final_url = f"{url}?{urlencode(params)}"
        return self.scrapper.endpData(final_url)

    def _get_matches(self):
        elem_struct = 'a[href^="/matches/"]'
        links = self.scrapper.find_elem(elem_struct)
        return links

    def _wait_for_download(self, before_files=None, timeout=3600):
        if before_files is None:
            before_files = set(os.listdir(self.download_dir))

        before_files = {f for f in before_files if not f.endswith(".tmp")}
        print("Before:", before_files)
        time.sleep(2)
        seconds = 0
        while seconds < timeout:
            files = set(os.listdir(self.download_dir))

            part_files = [f for f in files if f.endswith('.crdownload') or f.endswith('.part')]
            if part_files:
                time.sleep(1)
                seconds += 1
                continue

            files = {f for f in files if f.lower().endswith(".rar")}
            print("After:", files)
            new_files = files - before_files
            print("New:", new_files)
            if new_files:
                return list(new_files)[0]

            time.sleep(1)
            seconds += 1

        raise TimeoutError("Файл не был скачан в заданное время")

    def _get_demo(self, match_link):
        driver = self.scrapper.driver

        self.scrapper.endpData(match_link)

        download_button = "a[data-demo-link]"
        demo_elems = self.scrapper.find_elem(download_button)

        if not demo_elems:
            print(f"Для матча {match_link} нет демки (data-demo-link).")
            return 0, None

        demo_path = demo_elems[0].get_attribute("data-demo-link")
        if not demo_path:
            print(f"data-demo-link пустой для матча {match_link}.")
            return 0, None

        print("Путь к демоверсии:", demo_path)
        demo_url = urljoin("https://www.hltv.org", demo_path)

        print(f"Demo({demo_url}) download has been started...")
        before_files = set(os.listdir(self.download_dir))

        try:
            driver.get(demo_url)
        except TimeoutException:
            print(f"Таймаут при загрузке демки: {demo_url}, останавливаем загрузку страницы.")
            try:
                driver.execute_script("window.stop();")
            except Exception as e:
                print(f"Не удалось остановить загрузку вкладки: {e}")

        try:
            demo_filename = self._wait_for_download(before_files=before_files)
        except TimeoutError as e:
            print(f"{e}")
            return 0, None

        print("Demo download completed!")
        return 1, demo_filename

    def _load_downloaded_ids(self):
        if not os.path.exists(self.downloaded_file):
            return set()
        try:
            with open(self.downloaded_file, "r", encoding="utf-8") as f:
                data = json.load(f)
            return set(str(x) for x in data)
        except Exception as e:
            print(f"Cannot read {self.downloaded_file}: {e}")
            return set()

    def _save_downloaded_ids(self):
        try:
            with open(self.downloaded_file, "w", encoding="utf-8") as f:
                json.dump(sorted(self.downloaded_ids), f, ensure_ascii=False, indent=2)
        except Exception as e:
            print(f"Cannot save {self.downloaded_file}: {e}")

    def get_match_teams_and_sides(self, match_url: str):
        self.scrapper.endpData(match_url)

        rows = self.scrapper.find_elem("div.round-history-team-row")
        if len(rows) < 2:
            print("Нашлось меньше двух round-history-team-row")
            return None

        rows = rows[:2]
        teams = []

        for row in rows:
            team_img = row.find_element(By.CSS_SELECTOR, "img.round-history-team")
            team_name = team_img.get_attribute("alt") or team_img.get_attribute("title") or ""

            outcome_imgs = row.find_elements(By.CSS_SELECTOR, "img.round-history-outcome")
            outcomes = []
            for img in outcome_imgs[:2]:
                src = img.get_attribute("src") or ""
                filename = src.split("/")[-1]
                base = filename.split(".")[0]
                outcomes.append(base)

            teams.append({"name": team_name, "outcomes": outcomes})

        def classify_side(outcomes):
            has_t = any(o in ("t_win", "bomb_exploded") for o in outcomes)
            has_ct = any(o in ("ct_win", "bomb_defused") for o in outcomes)
            if has_t and not has_ct:
                return "T"
            if has_ct and not has_t:
                return "CT"
            return None

        side1 = classify_side(teams[0]["outcomes"])
        side2 = classify_side(teams[1]["outcomes"])

        if side1 and not side2:
            side2 = "CT" if side1 == "T" else "T"
        elif side2 and not side1:
            side1 = "CT" if side2 == "T" else "T"

        return {
            "team1": {"name": teams[0]["name"], "side": side1},
            "team2": {"name": teams[1]["name"], "side": side2},
        }

    def _get_matches_json(self, json_path):
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        url_links = [bunch.get("url") for bunch in data]
        return url_links

    def download_one_demo(self, source="file", json_path=None, offset=0):
        if source == "site" and json_path is None:
            self._get_stat_matches_page({"offset": offset})
            links = self._get_matches()
            hrefs = [link.get_attribute("href") for link in links]
        else:
            hrefs = self._get_matches_json(json_path)

        filtered = []
        for href in hrefs:
            match_id = self._extract_match_id_generic(href)
            if not match_id:
                print(f"Cannot find match_id from {href}, download by default")
                filtered.append((href, match_id))
                continue
            if match_id in self.downloaded_ids:
                print(f"Match {match_id} already saved, skip it")
                continue
            filtered.append((href, match_id))

        if not filtered:
            print("Нет новых матчей для скачивания")
            return False, None, None

        href, match_id = filtered[0]

        ok, demo_filename = self._get_demo(href)
        if not ok:
            print(f"Cannot download demo for {href}")
            return False, match_id, None

        self.scrapper.endpData(href)
        list_of_maps = self.scrapper.find_elem('a[href^="/stats/matches/"]')
        map_hrefs = [link.get_attribute("href") for link in list_of_maps]
        map_team_meta = dict()
        for i, map_href in enumerate(map_hrefs):
            print("MAP HREF:", map_href)
            map_team_meta[f"map_{i+1}"] = self.get_match_teams_and_sides(map_href)

        if match_id:
            self.metadata[match_id] = {
                "url": href,
                "demo_file": demo_filename,
                "maps": map_team_meta,
            }
            self.downloaded_ids.add(match_id)
            self._save_downloaded_ids()
            self._save_metadata()
            print(f"Match {match_id} added to download list")
        else:
            print("Downloaded demo, but match_id is empty, skipping metadata id")

        return True, match_id, demo_filename


In [9]:
import os
import re
import json
import time
from collections import defaultdict

def designation_from_header_meta(meta: dict) -> dict:
    """
    Возвращает {'ct': 1|2, 't': 1|2} по meta.team1/2.side.
    """
    t1 = (meta or {}).get("team1") or {}
    t2 = (meta or {}).get("team2") or {}
    s1 = str(t1.get("side", "")).upper()
    s2 = str(t2.get("side", "")).upper()

    des = {"ct": 1, "t": 2}  # дефолт
    if s1 == "CT":
        des["ct"] = 1
    elif s2 == "CT":
        des["ct"] = 2
    if s1 == "T":
        des["t"] = 1
    elif s2 == "T":
        des["t"] = 2
    return des


def pick_any_meta_tick(recs: list) -> dict:
    for r in recs or []:
        mt = r.get("meta_tick")
        if isinstance(mt, dict) and mt:
            return mt
    return {}

def load_meta_tickrates(meta_json_path: str) -> dict:
    if not meta_json_path or not os.path.exists(meta_json_path):
        return {}
    try:
        with open(meta_json_path, "r", encoding="utf-8") as f:
            obj = json.load(f) or {}
        return obj if isinstance(obj, dict) else {}
    except Exception as e:
        print(f"[WARN] cannot read meta_json {meta_json_path}: {e}")
        return {}


def infer_game_num(meta_info: dict, map_key: str) -> int:
    g = infer_game_num_from_map_key(map_key)
    if g is not None:
        return int(g)
        
    for k in ("game_num", "map_num", "map_number", "gameNumber"):
        v = (meta_info or {}).get(k)
        if v is not None:
            try:
                return int(v)
            except Exception:
                pass

    return 1


def normalize_game_nums_inplace(recs: list) -> None:
    """
    Если у нескольких карт один и тот же game_num (обычно все == 1),
    переопределим их как 1..N по порядку в recs.
    """
    if not recs:
        return
    gset = {int(r.get("game_num", 1) or 1) for r in recs}
    if len(recs) > 1 and len(gset) == 1:
        for i, r in enumerate(recs, start=1):
            r["game_num"] = i


def infer_game_num_from_map_key(map_key: str):
    if not map_key:
        return None
    m = re.search(r"(?:^|[-_])m(\d+)(?:[-_]|$)", map_key.lower())
    return int(m.group(1)) if m else None

def infer_match_format_from_rar(rar_file: str):
    if not rar_file:
        return None
    s = rar_file.lower()
    m = re.search(r"(?:^|[-_])bo(\d+)(?:[-_.]|$)", s)
    if m:
        n = int(m.group(1))
        if 1 <= n <= 9:
            return {"type": "bestOf", "count": n}
    m = re.search(r"best\s*of\s*(\d+)", s)
    if m:
        n = int(m.group(1))
        if 1 <= n <= 9:
            return {"type": "bestOf", "count": n}
    return None

download_dir = r"E:\Demos"
metadata_path = r"C:\Users\saidk\scripts\CS2_datasets\demos_metadata.json"
extract_dir = os.path.join(download_dir, "extracted")

csv_dir = r"E:\Demos\csv"
parquet_dir = r"E:\Demos\parquet"
headers_dir = r"E:\Demos\headers"

match_csv_dir = os.path.join(csv_dir, "match_merged")
match_parquet_dir = os.path.join(parquet_dir, "match_merged")
os.makedirs(match_csv_dir, exist_ok=True)
os.makedirs(match_parquet_dir, exist_ok=True)

os.makedirs(csv_dir, exist_ok=True)
os.makedirs(parquet_dir, exist_ok=True)
os.makedirs(headers_dir, exist_ok=True)

OUT_JSON_DIR = headers_dir
os.makedirs(OUT_JSON_DIR, exist_ok=True)

df_keys_to_save = [
    "rounds",
    "rounds_raw",
    "grenades",
    "damages",
    "bomb",
    "smokes",
    "infernos",
    "shots",
    "ticks",
    "rating",
    "kast",
    "adr",
    "trades",
    "kills",
    "footsteps",
]
parquet_keys = {"ticks", "grenades"}

dem = HLTVDemo(download_dir=download_dir, metadata_file=metadata_path)
csvtor = CSVtor(path_to_zips=download_dir, metadata_path=metadata_path, extract_to=extract_dir)

target_count = 500
downloaded = 0

try:
    while downloaded < target_count:
        ok, match_id, demo_filename = dem.download_one_demo(source="file", json_path="hltv_matches_large.json")
        if not ok:
            print("Ничего нового не скачано (или ошибка). Выходим из цикла.")
            break

        downloaded += 1

        groups = {}  # map_key -> {"parts": [(part_index, data), ...]}

        for dem_name, data in csvtor.dem_to_csv(half_rounds=HALF_ROUNDS):
            base_name = os.path.splitext(dem_name)[0]
            tokens = base_name.split("-")

            part_index = 1
            if tokens and re.match(r"^p\d+$", tokens[-1], re.IGNORECASE):
                part_index = int(tokens[-1][1:])
                map_key = "-".join(tokens[:-1])
            else:
                map_key = base_name

            groups.setdefault(map_key, {"parts": []})["parts"].append((part_index, data))
        
        # output_dir = r"E:\Demos\logs"
        # os.makedirs(output_dir, exist_ok=True)
        
        # output_path = os.path.join(output_dir, f"groups.json")
        
        # with open(output_path, "w", encoding="utf-8") as f:
        #     f.write(str(groups))
        
        # print(f"Groups сохранены в {output_path}")

        # PASS 1: glue parts per map + save per-map CSV/parquet/header + collect records
        match_records = defaultdict(list)  # (match_id, rar_file) -> [record]

        for map_key, group in groups.items():
            # Сортируем все даныне по номеру части
            parts = sorted(group["parts"], key=lambda x: x[0])
            first_data = parts[0][1] # Сохраняем информацию по первой части карты(или по всей карте, если разделения не было)
            meta_info = first_data.get("meta") or {} # Сохраняем мета информацию о карте
            game_num = infer_game_num(meta_info, map_key) # Определяем номер карты

            match_id_local = meta_info.get("match_id") # Забираем id матча
            rar_file = meta_info.get("rar_file") # Забираем имя зип файла, с которого была вытянута демка

            file_base = f"{map_key}_{match_id_local}" if match_id_local is not None else map_key

            # header json
            header_obj = first_data.get("header")
            if header_obj is not None:
                header_payload = {
                    "match_id": match_id_local,
                    "map": header_obj.get("map_name"),
                    "map_key": map_key,
                    "header": header_obj,
                    "meta": meta_info,
                }
                out_header_json = os.path.join(headers_dir, f"{file_base}_header.json")
                with open(out_header_json, "w", encoding="utf-8") as f:
                    json.dump(header_payload, f, ensure_ascii=False, indent=2)
                print(f"Сохранён HEADER JSON: {out_header_json}")
            else:
                out_header_json = None
                print(f"[WARN] header отсутствует в данных для {file_base}")

            # meta_tickrate json
            meta_path = first_data.get("meta_path")
            meta_tick = {}
            if meta_path and os.path.exists(meta_path):
                with open(meta_path, "r", encoding="utf-8") as f:
                    meta_tick = json.load(f) or {}
            else:
                print(f"[WARN] meta_path не найден/не передан: {meta_path}")

            round_offsets, tick_offsets = [], []
            r_off, t_off = 0, 0
            for _, part_data in parts:
                round_offsets.append(r_off)
                tick_offsets.append(t_off)

                rdf = part_data.get("rounds")
                if rdf is not None and not rdf.empty and "round_num" in rdf.columns:
                    mx = pd.to_numeric(rdf["round_num"], errors="coerce").dropna()
                    if not mx.empty:
                        r_off += int(mx.max())
                ### Вот этот момент под вопросом - возможно тики внутри следующих частей одного матча рассчитываются не с нуля
                mx_tick = _estimate_part_max_tick(part_data)
                t_off += (int(mx_tick) + 1) if mx_tick and mx_tick > 0 else 0

            merged_chunks = {k: [] for k in df_keys_to_save}

            r_off, t_off = 0, 0
            
            for _, part_data in parts:
                # 1) применяем текущие оффсеты к датафреймам этой части и накапливаем
                for key in df_keys_to_save:
                    df = part_data.get(key)
                    if df is None or df.empty:
                        continue
                    merged_chunks[key].append(_apply_offsets(df, round_offset=r_off, tick_offset=t_off))
            
                # 2) после добавления — обновляем оффсеты для следующей части
                rdf = part_data.get("rounds")
                if rdf is not None and not rdf.empty and "round_num" in rdf.columns:
                    mx = pd.to_numeric(rdf["round_num"], errors="coerce").dropna()
                    if not mx.empty:
                        r_off += int(mx.max())

                # ВАЖНО: смещение тиков для следующей части
                mx_tick = _estimate_part_max_tick(part_data)
                t_off += (int(mx_tick) + 1) if (mx_tick is not None and mx_tick > 0) else 0
            
            # 3) финальная сборка склеенных df по каждому ключу
            merged_dfs = {}
            for key, dfs in merged_chunks.items():
                if not dfs:
                    continue
                mdf = pd.concat(dfs, ignore_index=True)
            
                if key == "rounds":
                    mdf = add_team_winner(mdf, meta_info=meta_info, half_rounds=HALF_ROUNDS)
            
                mdf["game_num"] = int(game_num)
                mdf["match_id"] = match_id_local
                mdf["map"] = header_obj.get("map_name")
                merged_dfs[key] = mdf

            saved_paths = {}
            for key, df in merged_dfs.items():
                if key in parquet_keys:
                    outp = os.path.join(parquet_dir, f"{file_base}_{key}.parquet")
                    df.to_parquet(outp, index=False)
                    saved_paths[key] = outp
                    print(f"Сохранён Parquet: {outp}")
                else:
                    outp = os.path.join(csv_dir, f"{file_base}_{key}.csv")
                    df.to_csv(outp, index=False)
                    saved_paths[key] = outp
                    print(f"Сохранён CSV: {outp}")

            map_winner = None
            if "rounds" in merged_dfs and not merged_dfs["rounds"].empty and "team_winner" in merged_dfs["rounds"].columns:
                rr = merged_dfs["rounds"].sort_values("round_num")
                tw = rr["team_winner"].dropna()
                if not tw.empty:
                    map_winner = str(tw.iloc[-1])

            match_records[(str(match_id_local), str(rar_file))].append({
                "map_key": map_key, # Название файла с демкой соответствующей карты
                "map_name": header_obj.get("map_name"), # Название карты
                "file_base": file_base, # map_key + match_id
                "game_num": int(game_num), # Порядковый номер карты
                "header_obj": header_obj, # Заголовок, взятый из демки
                "meta_tick": meta_tick, # Данные с тиками по карте
                "rar_file": rar_file, # Название rar файла
                "meta_info": meta_info, # Meта информация по карте
                "saved_paths": saved_paths, # Пути сохраненных csv и parquet из Демок
                "map_winner": map_winner, # Победитель карты
                "merged_dfs": merged_dfs, # Все текущие данные по матчу из demo
            })

        # output_dir = r"E:\Demos\logs"
        # os.makedirs(output_dir, exist_ok=True)
        
        # output_path = os.path.join(output_dir, f"match_records.json")
        
        # with open(output_path, "w", encoding="utf-8") as f:
        #     f.write(str(match_records))
        
        # print(f"Match Records сохранены в {output_path}")

        # PASS 2: build JSONs in map order, patch metadata + teams.games_won, and save merged-by-match dfs
        from convert_demo_to_json_UPDATED_v4 import DataPaths, ClockConfig
        from RimbleLiveBuilder_UPDATED_v4 import RimbleLiveBuilder, LiveBuildConfig
        
        for match_key, recs in match_records.items():
            # 1) сортировка / нормализация game_num
            recs = sorted(recs, key=lambda r: int(r.get("game_num")))
            match_id_local = match_key[0]

            match_header_all_path = os.path.join(headers_dir, f"{match_id_local}_header_all.json")
            match_meta_all_path = os.path.join(headers_dir, f"{match_id_local}_meta_all.json")
            
            headers_by_g = {}
            meta_by_g = {}
            
            for r in recs:
                g = int(r.get("game_num")) # Номер карты
                mi = r.get("meta_info") or {} # Инфомрация по командам(их имена и начальная сторона)
                hdr = r.get("header_obj")
                if hdr is None:
                    continue
            
                # старый формат header json
                header_payload = {
                    "match_id": str(mi.get("match_id") or match_id_local),
                    "map": str(r.get("map_name") or ""),     # это "Inferno" и т.п. (для удобства)
                    "map_key": str(r.get("map_key") or ""),
                    "header": hdr,           
                    "meta": {
                        "team1": (mi.get("team1") or {}),
                        "team2": (mi.get("team2") or {}),
                        "rar_file": mi.get("rar_file"),
                    }
                }
            
                headers_by_g[g] = header_payload
            
            with open(match_header_all_path, "w", encoding="utf-8") as f:
                json.dump({"match_id": str(match_id_local), "by_game_num": headers_by_g}, f, ensure_ascii=False, indent=2)
            
            print(f"[HDR ALL] saved: {match_header_all_path}")
                        
            # meta на матч
            match_meta_path = os.path.join(headers_dir, f"{match_id_local}_meta.json")
            meta_by_game = {}
            for r in recs:
                g = str(int(r["game_num"]))
                mt = r.get("meta_tick") or {}
                meta_by_game[g] = {
                    "tickrate_demoparser2": mt.get("tickrate_demoparser2"),
                    "tickrate_rounds_median": mt.get("tickrate_rounds_median"),
                    "freeze_time_seconds": mt.get("freeze_time_seconds", 15),
                    "round_time_seconds": mt.get("round_time_seconds", 115),
                    "bomb_time_seconds": mt.get("bomb_time_seconds", 40),
                }
            
            with open(match_meta_path, "w", encoding="utf-8") as f:
                json.dump({"match_id": str(match_id_local), "meta_by_game_num": meta_by_game}, f, ensure_ascii=False, indent=2)

            
            print(f"[META] saved: {match_meta_path}")
        
            print("RECS (normalized):", [(r["file_base"], r["game_num"]) for r in recs])
        
            # 2) match-merged датасеты: БЕЗ чтения с диска
            for key in df_keys_to_save:
                parts = []
                for r in recs:
                    mdfs = r.get("merged_dfs") or {}
                    df = mdfs.get(key)
                    if df is None or df.empty:
                        continue
        
                    # гарантируем наличие game_num (на всякий случай)
                    if "game_num" not in df.columns:
                        df = df.copy()
                        df["game_num"] = int(r.get("game_num" ))
        
                    parts.append(df)
        
                if not parts:
                    continue
        
                big = pd.concat(parts, ignore_index=True)
                
                big["game_num"] = pd.to_numeric(big["game_num"], errors="coerce").fillna(1).astype(int)
        
                outp = os.path.join(
                    match_parquet_dir if key in parquet_keys else match_csv_dir,
                    f"{match_key[0]}_{key}.{'parquet' if key in parquet_keys else 'csv'}"
                )
        
                if key in parquet_keys:
                    big.to_parquet(outp, index=False)
                else:
                    big.to_csv(outp, index=False)
        
                print(f"[MATCH MERGED] saved: {outp}")

            paths = DataPaths(
                ticks=os.path.join(match_parquet_dir, f"{match_id_local}_ticks.parquet"),
                grenades=os.path.join(match_parquet_dir, f"{match_id_local}_grenades.parquet"),
            
                rounds=os.path.join(match_csv_dir, f"{match_id_local}_rounds.csv"),
                kills=os.path.join(match_csv_dir, f"{match_id_local}_kills.csv"),
                damages=os.path.join(match_csv_dir, f"{match_id_local}_damages.csv"),
                shots=os.path.join(match_csv_dir, f"{match_id_local}_shots.csv"),
                bomb=os.path.join(match_csv_dir, f"{match_id_local}_bomb.csv"),
                smokes=os.path.join(match_csv_dir, f"{match_id_local}_smokes.csv"),
                infernos=os.path.join(match_csv_dir, f"{match_id_local}_infernos.csv"),
                footsteps=os.path.join(match_csv_dir, f"{match_id_local}_footsteps.csv"),
            
                header_json=match_header_all_path, # Тут храниться информация об именх команд их стартовых сторонах и имен карт
            )
            
            
            meta_tick = load_meta_tickrates(paths.meta_json)
            
            tickrate_dp2 = meta_tick.get("tickrate_demoparser2")
            tickrate_rounds = meta_tick.get("tickrate_rounds_median")
            tickrate_final = tickrate_dp2 or tickrate_rounds or 128

            clock_cfg = list()
            for g_n, meta_by_game_data in meta_by_game.items():
                clock_cfg.append(ClockConfig(
                freeze_time_seconds=int(meta_by_game_data.get("freeze_time_seconds", 15)),
                round_time_seconds=int(meta_by_game_data.get("round_time_seconds", 115)),
                bomb_time_seconds=int(meta_by_game_data.get("bomb_time_seconds", 40)),
                tickrate_demoparser2=int(meta_by_game_data.get("tickrate_demoparser2", 64)),
                tickrate_rounds_median=float(meta_by_game_data.get("tickrate_rounds_median", 85.33)),
            ))
            
            print(
                f"  ClockConfig:"
                f"(dp2={clock_cfg[0].tickrate_demoparser2}, rounds={clock_cfg[0].tickrate_rounds_median}), "
                f"freeze={clock_cfg[0].freeze_time_seconds}, round={clock_cfg[0].round_time_seconds}"
            )

            external_meta = {
                    "matchid": str(match_id_local),
                    "match_status": meta_info0.get("match_status", "LIVE"),
                    "match_format": match_format,
                    "game_num": int(g),
                    "updated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
                }
            
            
            
            
finally:
    dem.scrapper.quit()


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



URL: https://www.hltv.org/matches/2356688/heroic-vs-fnatic-pinnacle-cup-championship-2022
URL: https://www.hltv.org/matches/2356748/sprout-vs-eternal-fire-republeague-season-3
URL: https://www.hltv.org/matches/2356700/ex-mad-lions-vs-ecstatic-elisa-invitational-spring-2022
URL: https://www.hltv.org/matches/2356671/movistar-riders-vs-tyloo-global-esports-tour-dubai-2022
URL: https://www.hltv.org/matches/2356687/astralis-vs-imperial-pinnacle-cup-championship-2022
URL: https://www.hltv.org/matches/2356686/fnatic-vs-big-pinnacle-cup-championship-2022
URL: https://www.hltv.org/matches/2356699/big-academy-vs-777-elisa-invitational-spring-2022
URL: https://www.hltv.org/matches/2356670/complexity-vs-og-global-esports-tour-dubai-2022
URL: https://www.hltv.org/matches/2356747/eg-black-vs-axolotls-esl-challenger-league-season-41-north-america
URL: https://www.hltv.org/matches/2356715/bhop-vs-bad-news-bears-esl-challenger-league-season-41-north-america
URL: https://www.hltv.org/matches/2356714/gai

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=143.0.7499.170)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0xa61213
	0xa61254
	0x84e6dd
	0x82ca8d
	0x8c1ffb
	0x8dd44c
	0x8bb2e6
	0x88d321
	0x88e1d4
	0xcb5254
	0xcb080b
	0xccd0ea
	0xa7b118
	0xa8311d
	0xa69518
	0xa696d9
	0xa53a68
	0x76a6fcc9
	0x773082ae
	0x7730827e


In [2]:
# test_rimble_store.py
# Запуск: python test_rimble_store.py

from RimbleMatchDataStore import RimbleMatchDataStore, ClockConfig, DataPaths

import os
import json
import time


def load_meta_tickrates(meta_json_path: str) -> dict:
    if not meta_json_path or not os.path.exists(meta_json_path):
        return {}
    try:
        with open(meta_json_path, "r", encoding="utf-8") as f:
            obj = json.load(f) or {}
        return obj if isinstance(obj, dict) else {}
    except Exception as e:
        print(f"[WARN] cannot read meta_json {meta_json_path}: {e}")
        return {}


def _safe_filename(s: str) -> str:
    return "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in s)



# --- paths ---
parquet_dir = r"E:\Demos\parquet"
match_parquet_dir = os.path.join(parquet_dir, "match_merged")

match_id_local = 2378228

csv_dir = r"E:\Demos\csv"
match_csv_dir = os.path.join(csv_dir, "match_merged")

headers_dir = r"E:\Demos\headers"
match_meta_path = os.path.join(headers_dir, f"{match_id_local}_meta.json")
match_header_all_path = os.path.join(headers_dir, f"{match_id_local}_header_all.json")

paths = DataPaths(
    ticks=os.path.join(match_parquet_dir, f"{match_id_local}_ticks.parquet"),
    grenades=os.path.join(match_parquet_dir, f"{match_id_local}_grenades.parquet"),

    rounds=os.path.join(match_csv_dir, f"{match_id_local}_rounds.csv"),
    kills=os.path.join(match_csv_dir, f"{match_id_local}_kills.csv"),
    damages=os.path.join(match_csv_dir, f"{match_id_local}_damages.csv"),
    shots=os.path.join(match_csv_dir, f"{match_id_local}_shots.csv"),
    bomb=os.path.join(match_csv_dir, f"{match_id_local}_bomb.csv"),
    smokes=os.path.join(match_csv_dir, f"{match_id_local}_smokes.csv"),
    infernos=os.path.join(match_csv_dir, f"{match_id_local}_infernos.csv"),
    footsteps=os.path.join(match_csv_dir, f"{match_id_local}_footsteps.csv"),

    header_json=match_header_all_path,
)

# --- meta parse (не обязательно для store, но оставим как у тебя) ---
meta_obj = load_meta_tickrates(match_meta_path)
meta_by_game_num = meta_obj.get("meta_by_game_num") or {}
clock_cfg_list = []
if isinstance(meta_by_game_num, dict) and meta_by_game_num:
    for g in sorted(meta_by_game_num.keys(), key=lambda x: int(x)):
        m = meta_by_game_num[g] or {}
        clock_cfg_list.append(ClockConfig(
            freeze_time_seconds=int(m.get("freeze_time_seconds", 15)),
            round_time_seconds=int(m.get("round_time_seconds", 115)),
            bomb_time_seconds=int(m.get("bomb_time_seconds", 40)),
            tickrate_demoparser2=float(m.get("tickrate_demoparser2")) if m.get("tickrate_demoparser2") is not None else None,
            tickrate_rounds_median=float(m.get("tickrate_rounds_median")) if m.get("tickrate_rounds_median") is not None else None,
        ))
else:
    clock_cfg_list = [ClockConfig()]

external_meta = {
    "matchid": str(match_id_local),
    "match_status": "LIVE",
    "match_format": {"type": "bestOf", "count": 3},
    "updated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),

    # если есть — можно включить:
    # "league": "Rushzone CS2 December 2025",
    # "started_at": "2025-12-22 09:00:01.533",
    # "match_duration_seconds": 6523.844,
}

store = RimbleMatchDataStore(
    paths=paths,
    match_meta_path=match_meta_path,
    external_meta=external_meta,
)

print("[TEST] OK: store created")
print("[TEST] game_nums:", store.game_nums)

print("\n[TEST] --- shapes ---")
print("ticks    :", store.df_ticks.shape, "cols:", len(store.df_ticks.columns))
print("rounds   :", store.df_rounds.shape, "cols:", len(store.df_rounds.columns))
print("bomb     :", store.df_bomb.shape, "cols:", len(store.df_bomb.columns))

# -----------------------------
# Build & validate per map
# -----------------------------
out_dir = r"E:\Demos\rimble_out"
os.makedirs(out_dir, exist_ok=True)
print(f"\n[TEST] Saving JSON files to: {out_dir}")

for g in store.game_nums:
    obj = store.build_rimble_json_for_map_last_tick(g)

    print(f"\n[TEST] === game_num={g} ===")
    print("[TEST] timeframe:", obj["timeframe"], "round_num:", obj["round_num"], "current_map:", obj["current_map"])

    # --- metadata cumulative checks ---
    md = obj.get("metadata", [])
    print("[TEST] metadata_len:", len(md))
    assert isinstance(md, list)
    # expected length = number of games up to this game_num (assumes game_nums = [1..N])
    assert len(md) == g, f"metadata must be cumulative: expected len={g}, got len={len(md)}"

    # previous games should be finished True
    for prev in md[:-1]:
        assert prev.get("finished") is True, "previous game metadata must be finished=True"

    # current game should be finished False
    assert md[-1].get("game_number") == g
    assert md[-1].get("finished") is False

    # clock sanity
    cur_clk = md[-1].get("clock", {})
    print("[TEST] clock.currentSeconds:", cur_clk.get("currentSeconds"), "ticking:", cur_clk.get("ticking"))
    assert "currentSeconds" in cur_clk and "ticking" in cur_clk

    # --- teams checks ---
    teams = obj.get("teams", [])
    assert isinstance(teams, list) and len(teams) == 2, "teams must be list of 2"
    for t in teams:
        for k in [
            "designation", "name", "id", "side",
            "current_score", "games_won", "loss_streak",
            "picked_maps", "banned_maps", "remaining_available_maps", "map_side_picked",
            "results", "players",
        ]:
            assert k in t, f"team missing key: {k}"
        assert isinstance(t["results"], list), "results must be list"
        assert isinstance(t["players"], list) and len(t["players"]) == 0

    print("[TEST] team1:", teams[0]["name"], "| side:", teams[0]["side"], "| score:", teams[0]["current_score"])
    print("[TEST] team2:", teams[1]["name"], "| side:", teams[1]["side"], "| score:", teams[1]["current_score"])

    # save
    date_part = str(obj.get("date", "unknown-date"))
    time_part = str(obj.get("time", "unknown-time")).replace(":", "-")
    fname = _safe_filename(f"cs2_{date_part}_{time_part}_game{g}_base+metadata+teams.json")
    out_path = os.path.join(out_dir, fname)

    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)

    print("[TEST] saved:", out_path)
    # --- teams/results checks ---
    teams = obj.get("teams", [])
    assert isinstance(teams, list) and len(teams) == 2
    
    t1, t2 = teams[0], teams[1]
    
    # results должны быть списком и накопительными
    r1 = t1.get("results", [])
    r2 = t2.get("results", [])
    assert isinstance(r1, list) and isinstance(r2, list)
    assert len(r1) == g, f"team1.results must be cumulative: expected {g}, got {len(r1)}"
    assert len(r2) == g, f"team2.results must be cumulative: expected {g}, got {len(r2)}"
    
    # прошлые карты в results должны быть is_final=True, текущая is_final=False
    for prev in r1[:-1]:
        assert prev.get("is_final") is True, "previous game result must be final"
    assert r1[-1].get("is_final") is False, "current game result must be not final"
    
    print("[TEST] games_won:", t1.get("games_won"), "-", t2.get("games_won"))
    print("[TEST] results(last):", r1[-1])

print("\n[TEST] DONE")



[TEST] OK: store created
[TEST] game_nums: [1, 2]

[TEST] --- shapes ---
ticks    : (3394070, 18) cols: 18
rounds   : (43, 13) cols: 13
bomb     : (254, 12) cols: 12

[TEST] Saving JSON files to: E:\Demos\rimble_out

[TEST] === game_num=1 ===
[TEST] timeframe: 189521 round_num: 24 current_map: de_train
[TEST] metadata_len: 1
[TEST] clock.currentSeconds: 7.984375 ticking: True
[TEST] team1: Virtus.pro | side: terrorists | score: 11
[TEST] team2: Vitality | side: counter-terrorists | score: 13
[TEST] saved: E:\Demos\rimble_out\cs2_2026-01-08_17-21-23_game1_base_metadata_teams.json
[TEST] games_won: 0 - 0
[TEST] results(last): {'type': 'gameResult', 'game_number': 1, 'map': 'train', 'score': {'team1': 11, 'team2': 13}, 'is_final': False, 'winner_designation': None}

[TEST] === game_num=2 ===
[TEST] timeframe: 184084 round_num: 19 current_map: de_inferno
[TEST] metadata_len: 2
[TEST] clock.currentSeconds: 62.046875 ticking: True
[TEST] team1: Virtus.pro | side: terrorists | score: 6
[TEST]

In [6]:
store.df_rounds.columns.tolist()

['round_num',
 'start',
 'freeze_end',
 'end',
 'official_end',
 'winner',
 'reason',
 'bomb_plant',
 'bomb_site',
 'team_winner',
 'game_num',
 'match_id',
 'map']

In [7]:
store.df_bomb.columns.tolist()

['tick',
 'event',
 'X',
 'Y',
 'Z',
 'steamid',
 'name',
 'bombsite',
 'round_num',
 'game_num',
 'match_id',
 'map']

In [8]:

js = Jsoner(paths, external_meta, clock_cfg_list=clock_cfg)

In [3]:
js.dem_ticks.head()

Unnamed: 0,inventory,balance,health,has_defuser,has_helmet,place,armor,side,X,Y,...,game_num,match_id,map,x,y,z,economy,current_health,alive,team_key
0,"[Butterfly Knife, Glock-18]",800,100,False,False,TSpawn,0,t,-2033.0,1362.233154,...,1,2378228,de_train,-2033.0,1362.233154,-171.96875,800,100,True,t
1,"[Karambit, USP-S]",800,100,False,False,CTSpawn,0,ct,1462.0,-1226.0,...,1,2378228,de_train,1462.0,-1226.0,-327.945557,800,100,True,ct
2,"[Karambit, USP-S]",800,100,False,False,CTSpawn,0,ct,1552.0,-1232.0,...,1,2378228,de_train,1552.0,-1232.0,-327.759521,800,100,True,ct
3,"[Butterfly Knife, Glock-18]",800,100,False,False,TSpawn,0,t,-1916.0,1456.233154,...,1,2378228,de_train,-1916.0,1456.233154,-171.96875,800,100,True,t
4,"[M9 Bayonet, P2000]",800,100,False,False,CTSpawn,0,ct,1600.0,-1440.0,...,1,2378228,de_train,1600.0,-1440.0,-327.963867,800,100,True,ct
