In [43]:
import os
import time
from datetime import datetime, timedelta
from itertools import islice

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import requests

# ────────────────────────────────────────────────────────────────────────────────
# Config
# ────────────────────────────────────────────────────────────────────────────────
COINGECKO_BASE = "https://api.coingecko.com/api/v3"
DATA_DIR = "./data"      # minute‑klines
os.makedirs(DATA_DIR, exist_ok=True)

pd.options.display.float_format = "{:,}".format  # ditch scientific notation

# ────────────────────────────────────────────────────────────────────────────────
# Helper utils
# ────────────────────────────────────────────────────────────────────────────────

def _chunks(iterable, n):
    it = iter(iterable)
    while chunk := list(islice(it, n)):
        yield chunk

# ────────────────────────────────────────────────────────────────────────────────
# Binance (pairs + klines)
# ────────────────────────────────────────────────────────────────────────────────

def get_binance_us_symbols():
    """Return every USDT trading pair listed on Binance US (e.g. BTCUSDT)."""
    r = requests.get("https://api.binance.us/api/v3/exchangeInfo", timeout=10)
    r.raise_for_status()
    symbols = r.json()["symbols"]
    return [s["symbol"] for s in symbols if s["quoteAsset"] == "USDT" and s["status"] == "TRADING"]


def fetch_and_save_klines(pairs, supply_map, interval="1m", days=14):
    """Download klines and compute minute‑level market_cap = close * circulating_supply.

    Args:
        pairs (list[str]): Binance pairs like BTCUSDT
        supply_map (dict): {SYMBOL: circulating_supply}
        interval (str): Binance kline interval
        days (int): look‑back window

    Returns:
        dict: {PAIR: parquet_path_written}
    """
    base_url = "https://api.binance.us"
    out = {}

    for pair in pairs:
        sym = pair.replace("USDT", "").upper()
        supply = supply_map.get(sym)
        if supply is None or pd.isna(supply):
            print(f"⚠️  No circulating_supply for {sym}; market_cap will be NA")
        print(f"⏳  Fetching klines {pair}…")
        end_ms = int(time.time() * 1000)
        start_ms = end_ms - days * 24 * 60 * 60 * 1000
        rows = []
        while start_ms < end_ms:
            try:
                resp = requests.get(
                    f"{base_url}/api/v3/klines",
                    params={
                        "symbol": pair,
                        "interval": interval,
                        "startTime": start_ms,
                        "endTime": end_ms,
                        "limit": 1000,
                    },
                    timeout=10,
                )
                resp.raise_for_status()
                batch = resp.json()
                if not batch:
                    break
                rows.extend(batch)
                start_ms = batch[-1][0] + 1
                time.sleep(0.05)
            except Exception as e:
                print(f"❌  Error klines {pair}: {e}")
                break

        if not rows:
            print(f"⚠️  No klines for {pair}")
            continue

        df = pd.DataFrame(
            rows,
            columns=[
                "open_time", "open", "high", "low", "close", "volume",
                "close_time", "qav", "num_trades", "taker_base",
                "taker_quote", "ignore",
            ],
        )
        df["open_time"] = pd.to_datetime(df["open_time"], unit="ms")
        df.set_index("open_time", inplace=True)
        df = df[["open", "high", "low", "close", "volume"]].astype(float)

        # compute minute‑level market cap via supply * close_price
        if supply is not None and not pd.isna(supply):
            df["market_cap"] = df["close"] * supply
        else:
            df["market_cap"] = pd.NA

        fname = os.path.abspath(os.path.join(DATA_DIR, f"{pair.lower()}_{interval}_last{days}d.parquet"))
        pq.write_table(pa.Table.from_pandas(df), fname)
        out[pair.upper()] = fname
        print(f"✅  {fname}  ({len(df):,} rows | market_cap computed)")
    return out

# ────────────────────────────────────────────────────────────────────────────────
# CoinGecko (symbol map, current market data)
# ────────────────────────────────────────────────────────────────────────────────

_SYMBOL_TO_ID = None

def _load_symbol_map():
    global _SYMBOL_TO_ID
    if _SYMBOL_TO_ID is None:
        print("🔄  Loading CoinGecko symbol list…")
        resp = requests.get(f"{COINGECKO_BASE}/coins/list", timeout=30)
        resp.raise_for_status()
        _SYMBOL_TO_ID = {c["symbol"].upper(): c["id"] for c in resp.json()}
    return _SYMBOL_TO_ID


def get_coingecko_ids(symbols):
    mapping = _load_symbol_map()
    return {s: mapping.get(s) for s in symbols}


def fetch_current_market_data(ids):
    """Return list of market data dicts from /coins/markets for given coin IDs."""
    results = []
    for chunk in _chunks(list(ids), 250):
        r = requests.get(
            f"{COINGECKO_BASE}/coins/markets",
            params={"vs_currency": "usd", "ids": ",".join(chunk), "price_change_percentage": "24h"},
            timeout=20,
        )
        if r.status_code == 429:
            print("⏳  Markets rate‑limited — sleep 60 s…")
            time.sleep(60)
            r = requests.get(
                f"{COINGECKO_BASE}/coins/markets",
                params={"vs_currency": "usd", "ids": ",".join(chunk), "price_change_percentage": "24h"},
                timeout=20,
            )
        r.raise_for_status()
        results.extend(r.json())
    return results

# ────────────────────────────────────────────────────────────────────────────────
# Master DataFrame
# ────────────────────────────────────────────────────────────────────────────────

def build_master_dataframe(pairs, market_data, kline_paths):
    md_df = pd.DataFrame(market_data)
    md_df["symbol"] = md_df["symbol"].str.upper()

    base_syms = [p.replace("USDT", "") for p in pairs]
    df = pd.DataFrame({"binance_pair": pairs, "base_symbol": base_syms}).merge(
        md_df, how="left", left_on="base_symbol", right_on="symbol"
    )

    df["klines_parquet"] = df["binance_pair"].map(kline_paths)

    keep = [
        "binance_pair", "name", "symbol", "current_price", "market_cap", "klines_parquet",
        "circulating_supply", "total_supply", "max_supply",
    ]
    return df[keep].sort_values("market_cap", ascending=False)

# ────────────────────────────────────────────────────────────────────────────────
# Main
# ────────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    DAYS = 14

    # 1 Binance pairs
    pairs = get_binance_us_symbols()

    # 2 CoinGecko IDs & current market data (includes circulating_supply)
    base_syms = [p.replace("USDT", "") for p in pairs]
    symbol_to_id = get_coingecko_ids(base_syms)
    market_data = fetch_current_market_data([cid for cid in symbol_to_id.values() if cid])

    # build supply map for quick lookup
    supply_map = {d["symbol"].upper(): d.get("circulating_supply") for d in market_data}

    # 3 Minute klines with computed market_cap column
    kline_paths = fetch_and_save_klines(pairs, supply_map, days=DAYS)

    # 4 Merge summary
    master_df = build_master_dataframe(pairs, market_data, kline_paths)
    master_path = "binance_coingecko_market_caps.parquet"
    master_df.to_parquet(master_path, index=False)

    print(master_df.head())
    print(f"📝  Master saved → {master_path}")


🔄  Loading CoinGecko symbol list…
⚠️  No circulating_supply for BTC; market_cap will be NA
⏳  Fetching klines BTCUSDT…
✅  C:\Users\saber\OneDrive\Documents\GitHub\CryptoBot4Dummies\data\btcusdt_1m_last14d.parquet  (20,160 rows | market_cap computed)
⚠️  No circulating_supply for ETH; market_cap will be NA
⏳  Fetching klines ETHUSDT…
✅  C:\Users\saber\OneDrive\Documents\GitHub\CryptoBot4Dummies\data\ethusdt_1m_last14d.parquet  (20,160 rows | market_cap computed)
⚠️  No circulating_supply for XRP; market_cap will be NA
⏳  Fetching klines XRPUSDT…
✅  C:\Users\saber\OneDrive\Documents\GitHub\CryptoBot4Dummies\data\xrpusdt_1m_last14d.parquet  (20,160 rows | market_cap computed)
⏳  Fetching klines BCHUSDT…
✅  C:\Users\saber\OneDrive\Documents\GitHub\CryptoBot4Dummies\data\bchusdt_1m_last14d.parquet  (20,160 rows | market_cap computed)
⏳  Fetching klines LTCUSDT…
✅  C:\Users\saber\OneDrive\Documents\GitHub\CryptoBot4Dummies\data\ltcusdt_1m_last14d.parquet  (20,160 rows | market_cap computed)


In [44]:
master_df

Unnamed: 0,binance_pair,name,symbol,current_price,market_cap,klines_parquet,circulating_supply,total_supply,max_supply
6,ADAUSDT,Cardano,ADA,0.736082,26539374705.0,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,36056732233.05023,45000000000.0,45000000000.0
143,SUIUSDT,Sui,SUI,3.77,12577022838.0,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,3338327017.911665,10000000000.0,10000000000.0
9,XLMUSDT,Stellar,XLM,0.284064,8823095980.0,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,31069300526.76352,50001786892.81785,50001786892.81785
133,HBARUSDT,Hedera,HBAR,0.18914,7993775795.0,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,42239760473.8114,50000000000.0,50000000000.0
3,BCHUSDT,Bitcoin Cash,BCH,388.9,7723815067.0,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,19870490.52165078,19870809.27165078,21000000.0
...,...,...,...,...,...,...,...,...,...
155,TRUMPUSDT,,,,,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,,,
156,NEIROUSDT,,,,,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,,,
159,DUSDT,,,,,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,,,
162,SUSDT,,,,,C:\Users\saber\OneDrive\Documents\GitHub\Crypt...,,,


In [30]:
from datetime import datetime, timedelta
import pandas as pd
from pathlib import Path
import requests
import time
import random

# --- Configuration ---
INITIAL_BALANCE = 1000.0
SUPPLY = 1_000_000_000          # assumed token supply
MARKETCAP_THRESHOLD = 100_000.0 # USD market cap trigger
INVEST_FRACTION = 0.10          # invest 10% of current balance per trade
HOLD_DURATION_MIN = 1           # hold period in minutes
FEE_RATE = 0.002                # 0.2% trading fee per execution (buy & sell)
BASE_URL = "https://frontend-api-v3.pump.fun"
PAGE_SIZE = 100                 # pagination size for live mode

# --- CSV Logging Setup ---
log_path = Path("data/trades")
log_path.mkdir(parents=True, exist_ok=True)
log_file = log_path / "all_trades.csv"
if not log_file.exists():
    headers = ["timestamp", "token", "action", "price", "amount_invested",
               "amount_returned", "gain_loss_usd", "gain_loss_pct"]
    pd.DataFrame(columns=headers).to_csv(log_file, index=False)

def log_trade(timestamp, token, action, price, amount_invested, amount_returned, gain_loss_usd, gain_loss_pct):
    entry = {
        "timestamp": [timestamp.strftime("%Y-%m-%d %H:%M:%S")],
        "token": [token],
        "action": [action],
        "price": [round(price, 10)],
        "amount_invested": [round(amount_invested, 2) if amount_invested else 0],
        "amount_returned": [round(amount_returned, 2) if amount_returned else 0],
        "gain_loss_usd": [round(gain_loss_usd, 2) if gain_loss_usd else 0],
        "gain_loss_pct": [round(gain_loss_pct, 2) if gain_loss_pct else 0]
    }
    pd.DataFrame(entry).to_csv(log_file, mode='a', header=False, index=False)

class Trade:
    def __init__(self, token, buy_price, buy_time, amount_invested):
        self.token = token
        self.buy_price = buy_price
        self.buy_time = buy_time
        self.amount_invested = amount_invested
        # Calculate buy fee and net tokens bought
        self.fee_buy = self.amount_invested * FEE_RATE
        net_invest = self.amount_invested - self.fee_buy
        self.tokens_bought = net_invest / self.buy_price

    def sell(self, sell_price, sell_time):
        # Calculate gross proceeds and sell fee
        gross_return = self.tokens_bought * sell_price
        fee_sell = gross_return * FEE_RATE
        net_return = gross_return - fee_sell
        # Profit after fees = net_return - original amount_invested
        profit_loss_usd = net_return - self.amount_invested
        profit_loss_pct = (profit_loss_usd / self.amount_invested * 100) if self.amount_invested else 0
        return {
            "token": self.token,
            "buy_price": self.buy_price,
            "sell_price": sell_price,
            "amount_invested": self.amount_invested,
            "amount_returned": net_return,
            "gain_loss_usd": profit_loss_usd,
            "gain_loss_pct": profit_loss_pct
        }

class PaperTradingSimulator:
    """
    Paper Trading Simulator in mock and live modes.
    - mock: simulate tokens with random walk
    - live: fetch real token USD market caps with fees factored

    Usage:
      sim = PaperTradingSimulator(INITIAL_BALANCE, mode="live")
      sim.run(total_minutes=1)
    """
    def __init__(self, initial_balance, mode="live", tokens=None):
        self.balance = initial_balance
        self.active_trade = None
        self.mode = mode
        if mode == "mock":
            if not tokens:
                raise ValueError("In mock mode, you must provide a tokens list.")
            self.tokens = tokens
            init_price = (MARKETCAP_THRESHOLD * 0.9) / SUPPLY
            self.prices = {t: init_price for t in tokens}

    def get_current_prices(self):
        if self.mode == "mock":
            for t in self.tokens:
                drift = 0.000000001
                noise = random.uniform(-0.000000002, 0.000000005)
                self.prices[t] = max(self.prices[t] + drift + noise, 0)
            return self.prices.copy()

        prices = {}
        offset = 0
        total_fetched = 0
        while True:
            try:
                resp = requests.get(
                    f"{BASE_URL}/coins",
                    params={"offset": offset, "limit": PAGE_SIZE}
                )
                resp.raise_for_status()
                coins = resp.json()
            except Exception as e:
                print(f"Error fetching /coins page at offset {offset}: {e}")
                break
            if not isinstance(coins, list) or not coins:
                break
            total_fetched += len(coins)
            # print(f"Fetched {len(coins)} coins from offset {offset}")
            for coin in coins:
                mint = coin.get("mint")
                usd_mc = coin.get("usd_market_cap") or coin.get("market_cap")
                if mint and usd_mc is not None:
                    try:
                        usd_mc = float(usd_mc)
                        prices[mint] = usd_mc / SUPPLY
                    except:
                        continue
            offset += len(coins)
        print(f"Total live tokens fetched: {total_fetched}, usable prices: {len(prices)}")
        return prices

    def simulate_minute(self, current_time, prices):
        print(f"[{current_time.strftime('%H:%M:%S')}] Scanning {len(prices)} tokens")

        # SELL logic
        if self.active_trade:
            t = self.active_trade
            if current_time >= t.buy_time + timedelta(minutes=HOLD_DURATION_MIN):
                res = t.sell(prices.get(t.token, t.buy_price), current_time)
                self.balance += res["amount_returned"]
                log_trade(current_time, t.token, "sell",
                          res["sell_price"], 0,
                          res["amount_returned"],
                          res["gain_loss_usd"], res["gain_loss_pct"])
                sign = "+" if res["gain_loss_pct"] >= 0 else ""
                print(f"BOUGHT {t.token} at ${t.buy_price:.6f} | "
                      f"SOLD at ${res['sell_price']:.6f} | "
                      f"{sign}{res['gain_loss_pct']:.2f}% | P/L "
                      f"{sign}${abs(res['gain_loss_usd']):.2f}")
                self.active_trade = None

        # BUY logic
        if not self.active_trade:
            for token, price in prices.items():
                if price * SUPPLY >= MARKETCAP_THRESHOLD:
                    invest_amt = self.balance * INVEST_FRACTION
                    if invest_amt <= 0:
                        break
                    fee_buy = invest_amt * FEE_RATE
                    total_cost = invest_amt + fee_buy
                    self.balance -= total_cost
                    self.active_trade = Trade(token, price, current_time, invest_amt)
                    log_trade(current_time, token, "buy",
                              price, invest_amt, 0, 0, 0)
                    print(f"Buying {token} at ${price:.6f} (Invested ${invest_amt:.2f}, Fee ${fee_buy:.2f})")
                    break

    def run(self, total_minutes=None):
        current_time = datetime.now()
        count = 0
        while total_minutes is None or count < total_minutes:
            prices = self.get_current_prices()
            self.simulate_minute(current_time, prices)
            current_time += timedelta(minutes=1)
            count += 1
            time.sleep(60)

# Usage example:
# sim = PaperTradingSimulator(INITIAL_BALANCE, mode="live")
# sim.run(total_minutes=1)


In [31]:

# Example usage:
# sim = PaperTradingSimulator(INITIAL_BALANCE)
# sim.run(total_minutes=60)

if __name__ == '__main__':
    sim = PaperTradingSimulator(INITIAL_BALANCE, mode="live")
    sim.run(total_minutes=5)


Total live tokens fetched: 1050, usable prices: 1049
[00:00:19] Scanning 1049 tokens
Buying BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at $0.000172 (Invested $100.00, Fee $0.20)
Total live tokens fetched: 1050, usable prices: 1049
[00:01:19] Scanning 1049 tokens
BOUGHT BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at $0.000172 | SOLD at $0.000158 | -8.52% | P/L $8.52
Buying BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at $0.000158 (Invested $99.13, Fee $0.20)
Total live tokens fetched: 1050, usable prices: 1049
[00:02:19] Scanning 1049 tokens
BOUGHT BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at $0.000158 | SOLD at $0.000168 | +5.59% | P/L +$5.54
Buying BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at $0.000168 (Invested $99.66, Fee $0.20)
Total live tokens fetched: 1050, usable prices: 1048
[00:03:19] Scanning 1048 tokens
BOUGHT BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at $0.000168 | SOLD at $0.000188 | +11.68% | P/L +$11.64
Buying BttJJaXbWsyCX3SFwtSHGQrsMnjrrkXtJQw5K15zpump at 

In [5]:
# drop any row that still has NA market caps
mcap = mcap.dropna(how='any')
price = price.reindex(mcap.index)          # keep the two in sync


In [None]:
mcap