In [2]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from typing import List, Dict, Optional

import hashlib
import json
import os

API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6IjdlMjBjNDk0LWU0MjAtNGFmOC05MzM2LTkxNTBjNDU3MmJjZCIsIm9yZ0lkIjoiNDY4ODE1IiwidXNlcklkIjoiNDgyMjkyIiwidHlwZUlkIjoiMDk3MTE4YTItNWVkOC00Yjc2LTg5YWItMjM5NDgzNDVjYzNiIiwidHlwZSI6IlBST0pFQ1QiLCJpYXQiOjE3NTY4NDM1NjIsImV4cCI6NDkxMjYwMzU2Mn0.yLn2ojeo6b4qJA9IYnSJlel5gZVlJChuZbhqkUBSLeo"

class FixedMoralisAnalyzer:
    def __init__(self, api_key: str, cache_file: str = "moralis_cache.json"):
        self.api_key = api_key
        self.base_url = "https://deep-index.moralis.io/api/v2"
        self.headers = {
            "Accept": "application/json",
            "X-API-Key": api_key
        }
        self.chains = {
            'eth': '0x1',
            'bsc': '0x38', 
            'polygon': '0x89',
            'arbitrum': '0xa4b1',
            'optimism': '0xa',
            'base': '0x2105'
        }

        # Initialize cache
        self.cache_file = cache_file
        if os.path.exists(cache_file):
            with open(cache_file, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}
        else:
            self.cache = {}

    def _make_request(self, endpoint: str, params: dict) -> dict:
        """Helper with caching logic"""
        key = hashlib.sha256((endpoint + json.dumps(params, sort_keys=True)).encode()).hexdigest()

        if key in self.cache:
            # ✅ Cached response
            return self.cache[key]

        # ❌ Not cached → API call
        url = f"{self.base_url}{endpoint}"
        response = requests.get(url, headers=self.headers, params=params)

        if response.status_code == 200:
            data = response.json()
            # Save to cache
            self.cache[key] = data
            with open(self.cache_file, "w") as f:
                json.dump(self.cache, f)
            return data
        else:
            print(f"API Error {response.status_code}: {response.text[:200]}")
            return {}


        
    def test_connection(self) -> bool:
        """Test if API key works with native balance endpoint"""
        try:
            url = f"{self.base_url}/0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045/balance"
            params = {"chain": "0x1"}  # Use hex chain ID
            
            response = requests.get(url, headers=self.headers, params=params)
            
            if response.status_code == 200:
                print("✅ Moralis API connection successful!")
                return True
            else:
                print(f"❌ API Error: {response.status_code}")
                print(f"Response: {response.text}")
                return False
                
        except Exception as e:
            print(f"❌ Connection error: {e}")
            return False
    
    def get_native_transactions(self, wallet: str, chain: str = '0x1', limit: int = 100) -> List[Dict]:
        endpoint = f"/{wallet}"
        params = {"chain": chain, "limit": limit}
        data = self._make_request(endpoint, params)
        return data.get('result', [])

    def get_erc20_transfers(self, wallet: str, chain: str = '0x1', limit: int = 100) -> List[Dict]:
        endpoint = f"/{wallet}/erc20/transfers"
        params = {"chain": chain, "limit": limit}
        data = self._make_request(endpoint, params)
        return data.get('result', [])

    def analyze_single_wallet(self, wallet: str) -> Optional[Dict]:
        """Analyze a single wallet across all chains"""
        print(f"\nAnalyzing wallet: {wallet}")
        
        wallet_stats = {
            'wallet': wallet,
            'total_native_txs': 0,
            'total_erc20_transfers': 0,
            'active_chains': 0,
            'chain_details': {}
        }
        
        for chain_name, chain_id in self.chains.items():
            print(f"  Checking {chain_name}...")
            
            try:
                # Get native transactions
                native_txs = self.get_native_transactions(wallet, chain_id, limit=50)
                
                # Get ERC20 transfers  
                erc20_transfers = self.get_erc20_transfers(wallet, chain_id, limit=50)
                
                chain_native_count = len(native_txs)
                chain_erc20_count = len(erc20_transfers)
                
                if chain_native_count > 0 or chain_erc20_count > 0:
                    wallet_stats['active_chains'] += 1
                    wallet_stats['total_native_txs'] += chain_native_count
                    wallet_stats['total_erc20_transfers'] += chain_erc20_count
                    
                    wallet_stats['chain_details'][chain_name] = {
                        'native_txs': chain_native_count,
                        'erc20_transfers': chain_erc20_count
                    }
                    
                    print(f"    {chain_name}: {chain_native_count} native, {chain_erc20_count} ERC20")
                
                # Rate limiting
                time.sleep(0.3)
                
            except Exception as e:
                print(f"    Error with {chain_name}: {e}")
                continue
        
        # Check qualification criteria (simplified for testing)
        total_activity = wallet_stats['total_native_txs'] + wallet_stats['total_erc20_transfers']
        
        if total_activity >= 10 and wallet_stats['active_chains'] >= 2:
            print(f"  ✅ QUALIFIED: {total_activity} total activities, {wallet_stats['active_chains']} chains")
            return wallet_stats
        else:
            print(f"  ❌ Not qualified: {total_activity} activities, {wallet_stats['active_chains']} chains")
            return None
    
    def get_detailed_data_for_wallet(self, wallet: str, max_per_chain: int = 100) -> List[Dict]:
        """Get detailed transaction data for a qualified wallet"""
        all_transactions = []
        
        for chain_name, chain_id in self.chains.items():
            try:
                print(f"    Getting {chain_name} data...")
                
                # Get native transactions
                native_txs = self.get_native_transactions(wallet, chain_id, limit=max_per_chain)
                
                for tx in native_txs:
                    processed = {
                        'tx_hash': tx.get('hash'),
                        'block_time': tx.get('block_timestamp'),
                        'wallet': wallet,
                        'blockchain': chain_name,
                        'from_address': tx.get('from_address'),
                        'to_address': tx.get('to_address'),
                        'value_wei': tx.get('value', '0'),
                        'value_native': float(tx.get('value', '0')) / 1e18 if tx.get('value') else 0,
                        'gas_used': tx.get('gas_used'),
                        'gas_price': tx.get('gas_price'),
                        'action': 'native_transfer',
                        'transaction_type': 'deposit' if tx.get('to_address', '').lower() == wallet.lower() else 'withdrawal'
                    }
                    all_transactions.append(processed)
                
                # Get ERC20 transfers
                erc20_transfers = self.get_erc20_transfers(wallet, chain_id, limit=max_per_chain)
                
                for transfer in erc20_transfers:
                    processed = {
                        'tx_hash': transfer.get('transaction_hash'),
                        'block_time': transfer.get('block_timestamp'),
                        'wallet': wallet,
                        'blockchain': chain_name,
                        'from_address': transfer.get('from_address'),
                        'to_address': transfer.get('to_address'),
                        'token_address': transfer.get('address'),
                        'token_symbol': transfer.get('token_symbol'),
                        'token_name': transfer.get('token_name'),
                        'value_raw': transfer.get('value', '0'),
                        'decimals': transfer.get('token_decimals', '18'),
                        'action': 'erc20_transfer',
                        'transaction_type': 'deposit' if transfer.get('to_address', '').lower() == wallet.lower() else 'withdrawal'
                    }
                    
                    # Calculate human-readable amount
                    try:
                        decimals = int(transfer.get('token_decimals', '18'))
                        raw_value = float(transfer.get('value', '0'))
                        processed['amount'] = raw_value / (10 ** decimals)
                    except:
                        processed['amount'] = 0
                    
                    all_transactions.append(processed)
                
                time.sleep(0.5)  # Rate limiting
                
            except Exception as e:
                print(f"    Error getting {chain_name} details: {e}")
                continue
        
        return all_transactions

def run_fixed_moralis_analysis(api_key: str, max_wallets: int = 3) -> pd.DataFrame:
    """Run the complete analysis with fixed endpoints"""
    
    analyzer = FixedMoralisAnalyzer(api_key)
    
    # Test connection
    if not analyzer.test_connection():
        print("❌ Connection failed. Check your API key.")
        return pd.DataFrame()
    
    # Test wallets (known active addresses)
    test_wallets = [
        "0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045",  # Vitalik
        "0x28C6c06298d514Db089934071355E5743bf21d60",  # Binance
        "0xF977814e90dA44bFA03b6295A0616a897441aceC",  # Alameda
        "0x3fC91A3afd70395Cd496C647d5a6CC9D4B2b7FAD",  # Uniswap
        "0x1111111254fb6c44bAC0beD2854e76F90643097d",  # 1inch
    ]
    
    print(f"\n🔍 Testing {len(test_wallets)} known active wallets...")
    
    qualified_wallets = []
    
    for i, wallet in enumerate(test_wallets[:max_wallets], 1):
        print(f"\n--- Wallet {i}/{max_wallets} ---")
        result = analyzer.analyze_single_wallet(wallet)
        
        if result:
            qualified_wallets.append(result)
        
        # Rate limiting between wallets
        if i < len(test_wallets):
            time.sleep(2)
    
    if not qualified_wallets:
        print("\n❌ No wallets qualified")
        return pd.DataFrame()
    
    print(f"\n✅ {len(qualified_wallets)} wallets qualified!")
    print("\n📊 Getting detailed transaction data...")
    
    # Get detailed data
    all_detailed_data = []
    
    for wallet_info in qualified_wallets:
        wallet = wallet_info['wallet']
        print(f"\n  Processing {wallet}...")
        
        wallet_transactions = analyzer.get_detailed_data_for_wallet(wallet, max_per_chain=50)
        all_detailed_data.extend(wallet_transactions)
        
        time.sleep(1)  # Rate limiting between wallets
    
    # Convert to DataFrame
    if all_detailed_data:
        df = pd.DataFrame(all_detailed_data)
        df['block_time'] = pd.to_datetime(df['block_time'])
        df = df.sort_values('block_time').reset_index(drop=True)
        
        print(f"\n🎉 SUCCESS! Collected {len(df)} transactions")
        
        # Save results
        df.to_csv('moralis_wallet_data.csv', index=False)
        print("💾 Data saved to 'moralis_wallet_data.csv'")
        
        # Show sample
        print(f"\nSample data:")
        print(df[['tx_hash', 'wallet', 'blockchain', 'action', 'transaction_type']].head())
        
        return df
    else:
        print("❌ No transaction data collected")
        return pd.DataFrame()

# USAGE:
if __name__ == "__main__":
    # Replace with your actual Moralis API key


    if API_KEY == "MORALIS_API_KEY":
        print("⚠️  Please set your actual Moralis API key")
    else:
        result = run_fixed_moralis_analysis(API_KEY, max_wallets=3)
        
        if not result.empty:
            print(f"\n📈 Final Results Summary:")
            print(f"Total transactions: {len(result)}")
            print(f"Unique wallets: {result['wallet'].nunique()}")
            print(f"Blockchains: {result['blockchain'].nunique()}")
            print(f"Date range: {result['block_time'].min()} to {result['block_time'].max()}")
        else:
            print("No data collected")

✅ Moralis API connection successful!

🔍 Testing 5 known active wallets...

--- Wallet 1/3 ---

Analyzing wallet: 0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045
  Checking eth...
    eth: 50 native, 50 ERC20
  Checking bsc...
    bsc: 50 native, 50 ERC20
  Checking polygon...
    polygon: 50 native, 50 ERC20
  Checking arbitrum...
    arbitrum: 50 native, 50 ERC20
  Checking optimism...
    optimism: 50 native, 50 ERC20
  Checking base...
    base: 50 native, 50 ERC20
  ✅ QUALIFIED: 600 total activities, 6 chains

--- Wallet 2/3 ---

Analyzing wallet: 0x28C6c06298d514Db089934071355E5743bf21d60
  Checking eth...
    eth: 50 native, 50 ERC20
  Checking bsc...
    bsc: 50 native, 50 ERC20
  Checking polygon...
    polygon: 17 native, 50 ERC20
  Checking arbitrum...
    arbitrum: 13 native, 50 ERC20
  Checking optimism...
    optimism: 4 native, 50 ERC20
  Checking base...
    base: 50 native, 50 ERC20
  ✅ QUALIFIED: 484 total activities, 6 chains

--- Wallet 3/3 ---

Analyzing wallet: 0xF97781

In [10]:
import requests
import pandas as pd
import time
import os
import json
from datetime import datetime
from typing import Dict, List, Optional
from dotenv import load_dotenv

# -------------------------------
# Load environment variables
# -------------------------------
load_dotenv()
MORALIS_API_KEY = os.getenv("MORALIS_API_KEY")
COINGECKO_API_KEY = os.getenv("GECKO_API_KEY")

In [None]:
import os
import json
import time
import requests
import pandas as pd
from datetime import datetime
from typing import Dict, List, Optional
from dotenv import load_dotenv

# -------------------------------
# Cache for prices
# -------------------------------
class PriceCache:
    def __init__(self, filename="price_cache.json"):
        self.filename = filename
        self.cache = {}
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}

    def get(self, key: str):
        return self.cache.get(key)

    def set(self, key: str, value):
        self.cache[key] = value
        with open(self.filename, "w") as f:
            json.dump(self.cache, f)

# -------------------------------
# Cache for contract-to-CGID mapping
# -------------------------------
class AddressCache:
    def __init__(self, filename="address_to_cgid.json"):
        self.filename = filename
        self.cache = {}
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}

    def get(self, key: str):
        return self.cache.get(key)

    def set(self, key: str, value):
        self.cache[key] = value
        with open(self.filename, "w") as f:
            json.dump(self.cache, f)

# -------------------------------
# Extended Analyzer
# -------------------------------
# -------------------------------
# Extended Analyzer
# -------------------------------
class ExtendedMoralisAnalyzer:
    def __init__(self, api_key: str, use_cache: bool = True, force_refresh: bool = False):
        """
        :param api_key: Moralis API key
        :param use_cache: if True, prefer cache
        :param force_refresh: if True, ignore cache and refresh from API
        """
        self.api_key = api_key
        self.base_url = "https://deep-index.moralis.io/api/v2"
        self.headers = {"Accept": "application/json", "X-API-Key": api_key}
        self.use_cache = use_cache
        self.force_refresh = force_refresh

        self.chains = {
            'eth': '0x1',
            'bsc': '0x38',
            'polygon': '0x89',
            'arbitrum': '0xa4b1',
            'optimism': '0xa',
            'base': '0x2105'
        }

        self.price_cache = PriceCache()
        self.address_cache = AddressCache()
        self.moralis_cache = ExtendedMoralisAnalyzer()

    # -------------------------------
    # Fetch ERC20 Transfers (cached)
    # -------------------------------
    def get_erc20_transfers(self, wallet: str, chain: str, limit: int = 50) -> List[Dict]:
        cache_key = f"{wallet}_{chain}"
        cached = self.moralis_cache.get(wallet, chain)

        if self.use_cache and cached and not self.force_refresh:
            return cached

        try:
            url = f"{self.base_url}/{wallet}/erc20/transfers"
            params = {"chain": chain, "limit": limit}
            response = requests.get(url, headers=self.headers, params=params)
            if response.status_code == 200:
                result = response.json().get('result', [])
                if self.use_cache:
                    self.moralis_cache.set(wallet, chain, result)
                return result
            return []
        except Exception as e:
            print(f"ERC20 transfer error: {e}")
            return []

    # -------------------------------
    # Price Fetcher (cached)
    # -------------------------------
    def get_price_usd(self, symbol: str, timestamp: str, token_address: str = None, blockchain: str = "ethereum") -> Optional[float]:
        if not symbol and not token_address:
            return None

        symbol = (symbol or "").lower()
        date_str = timestamp.split("T")[0]
        cache_key = f"{symbol}_{token_address}_{date_str}"
        cached = self.price_cache.get(cache_key)

        if self.use_cache and cached and not self.force_refresh:
            return cached

        try:
            mapping = {
                "eth": "ethereum",
                "weth": "weth",
                "usdc": "usd-coin",
                "usdt": "tether",
                "bnb": "binancecoin",
                "matic": "polygon"
            }
            cg_id = mapping.get(symbol)

            if not cg_id and token_address:
                cached_cgid = self.address_cache.get(token_address.lower())
                if cached_cgid:
                    cg_id = cached_cgid

            if not cg_id and token_address:
                try:
                    url = f"https://api.coingecko.com/api/v3/coins/{blockchain}/contract/{token_address}"
                    r = requests.get(url)
                    if r.status_code == 200:
                        data = r.json()
                        cg_id = data.get("id")
                        if cg_id and self.use_cache:
                            self.address_cache.set(token_address.lower(), cg_id)
                except Exception as e:
                    print(f"Contract lookup failed for {token_address}: {e}")

            if not cg_id:
                return None

            url = f"https://api.coingecko.com/api/v3/coins/{cg_id}/history"
            params = {"date": datetime.strptime(date_str, "%Y-%m-%d").strftime("%d-%m-%Y")}
            r = requests.get(url, params=params)
            if r.status_code == 200:
                data = r.json()
                price = data.get("market_data", {}).get("current_price", {}).get("usd")
                if price and self.use_cache:
                    self.price_cache.set(cache_key, price)
                return price
        except Exception as e:
            print(f"Price fetch error for {symbol} / {token_address}: {e}")
            return None
    # -------------------------------
    # GAS COSTS (from gas.fees table)
    # -------------------------------
    def get_gas_costs_for_wallet(self, wallet: str, max_per_chain: int = 50) -> pd.DataFrame:
        """
        Fetch gas fees for a given wallet across supported chains.
        Returns DataFrame with tx_hash, gas_used, fee in USD, etc.
        """
        all_gas = []
        for chain_name, chain_id in self.chains.items():
            try:
                url = f"{self.base_url}/{wallet}/transaction"
                params = {"chain": chain_id, "limit": max_per_chain}
                response = requests.get(url, headers=self.headers, params=params)
                if response.status_code != 200:
                    continue

                txs = response.json().get("result", [])
                for tx in txs:
                    try:
                        fee_usd = float(tx.get("gas_price", 0)) * float(tx.get("receipt_gas_used", 0)) / 1e18 * float(tx.get("usd_price", 0))
                        enriched = {
                            "wallet": wallet,
                            "blockchain": chain_name,
                            "tx_hash": tx.get("hash"),
                            "block_time": tx.get("block_timestamp"),
                            "gas_used": tx.get("receipt_gas_used"),
                            "gas_price": tx.get("gas_price"),
                            "gas_fee_usd": fee_usd,
                        }
                        all_gas.append(enriched)
                    except Exception as e:
                        print(f"Gas enrich error: {e}")
                        continue
            except Exception as e:
                print(f"Gas fetch error: {e}")
                continue

        if not all_gas:
            return pd.DataFrame()

        df = pd.DataFrame(all_gas)
        df['block_time'] = pd.to_datetime(df['block_time'])
        return df.sort_values("block_time").reset_index(drop=True)

# -------------------------------
# Runner
# -------------------------------
if __name__ == "__main__":
    load_dotenv()
    API_KEY = os.getenv("MORALIS_API_KEY")

    if not API_KEY:
        raise ValueError("⚠️ Please add MORALIS_API_KEY to your .env file!")

    wallet_address = "0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045"  # replace with user input
    analyzer = ExtendedMoralisAnalyzer(API_KEY)

    df = analyzer.get_detailed_data_for_wallet(wallet_address, max_per_chain=30)

    if df.empty:
        print("No transactions found.")
    else:
        print(df.head(20))
        print("\nSummary USD values:")
        print(df.groupby("transaction_type")["usd_value"].sum())


Fetching ERC20 transfers on eth...
Fetching ERC20 transfers on bsc...
Fetching ERC20 transfers on polygon...
Fetching ERC20 transfers on arbitrum...
Fetching ERC20 transfers on optimism...
Fetching ERC20 transfers on base...
                                        wallet blockchain  \
0   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
1   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
2   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
3   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
4   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
5   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
6   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
7   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
8   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
9   0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
10  0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045    polygon   
11  0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96

### analyzer.py

In [None]:
import os
import json
import time
from dotenv import load_dotenv
import requests
import pandas as pd
from datetime import datetime
from typing import Dict, List, Optional, Tuple

# -------------------------------
# Cache for prices
# -------------------------------
class PriceCache:
    def __init__(self, filename="price_cache.json"):
        self.filename = filename
        self.cache = {}
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}

    def get(self, key: str):
        return self.cache.get(key)

    def set(self, key: str, value):
        self.cache[key] = value
        with open(self.filename, "w") as f:
            json.dump(self.cache, f)

# -------------------------------
# Cache for contract-to-CGID mapping
# -------------------------------
class AddressCache:
    def __init__(self, filename="address_to_cgid.json"):
        self.filename = filename
        self.cache = {}
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}

    def get(self, key: str):
        return self.cache.get(key)

    def set(self, key: str, value):
        self.cache[key] = value
        with open(self.filename, "w") as f:
            json.dump(self.cache, f)

# -------------------------------
# Extended Analyzer
# -------------------------------
class ExtendedMoralisAnalyzer:
    def __init__(self, api_key: str, use_cache: bool = True, force_refresh: bool = False):
        self.api_key = api_key
        self.base_url = "https://deep-index.moralis.io/api/v2"
        self.headers = {"Accept": "application/json", "X-API-Key": api_key}

        self.chains = {
            'eth': '0x1',
            'bsc': '0x38',
            'polygon': '0x89',
            'arbitrum': '0xa4b1',
            'optimism': '0xa',
            'base': '0x2105'
        }

        self.price_cache = PriceCache()
        self.address_cache = AddressCache()

    # -------------------------------
    # Fetch ERC20 Transfers
    # -------------------------------
    def get_erc20_transfers(self, wallet: str, chain: str, limit: int = 50) -> List[Dict]:
        try:
            url = f"{self.base_url}/{wallet}/erc20/transfers"
            params = {"chain": chain, "limit": limit}
            response = requests.get(url, headers=self.headers, params=params)
            if response.status_code == 200:
                return response.json().get('result', [])
            return []
        except Exception as e:
            print(f"ERC20 transfer error: {e}")
            return []

    # -------------------------------
    # Fetch Tx Gas Cost (in native coin)
    # -------------------------------
    def get_tx_gas_cost(self, tx_hash: str, chain: str) -> Optional[float]:
        try:
            url = f"{self.base_url}/transaction/{tx_hash}"
            params = {"chain": chain}
            r = requests.get(url, headers=self.headers, params=params)
            if r.status_code == 200:
                data = r.json()
                gas_used = int(data.get("receipt_gas_used") or 0)
                gas_price = int(data.get("gas_price") or 0)
                native_spent = gas_used * gas_price / 1e18
                return native_spent
        except Exception as e:
            print(f"Gas fetch failed for {tx_hash}: {e}")
        return None

    # -------------------------------
    # PRICE FETCHER (Coingecko)
    # -------------------------------
    def get_price_usd(self, symbol: str, timestamp: str, token_address: str = None, blockchain: str = "ethereum") -> Optional[float]:
        if not token_address and not symbol:
            return None

        date_str = timestamp.split("T")[0]
        cache_key = f"{token_address or symbol}_{date_str}"
        cached = self.price_cache.get(cache_key)
        if cached:
            return cached

        cg_id = None

        try:
            # Step 1: prefer contract lookup first
            if token_address:
                # Check local address cache
                cached_cgid = self.address_cache.get(token_address.lower())
                if cached_cgid:
                    cg_id = cached_cgid
                else:
                    # Query Coingecko contract API
                    url = f"https://api.coingecko.com/api/v3/coins/{blockchain}/contract/{token_address}"
                    r = requests.get(url)
                    if r.status_code == 200:
                        data = r.json()
                        cg_id = data.get("id")
                        if cg_id:
                            self.address_cache.set(token_address.lower(), cg_id)

            # Step 2: fallback to hardcoded mapping if contract failed
            if not cg_id and symbol:
                mapping = {
                    "eth": "ethereum",
                    "weth": "weth",
                    "usdc": "usd-coin",
                    "usdt": "tether",
                    "bnb": "binancecoin",
                    "matic": "polygon"
                }
                cg_id = mapping.get(symbol.lower())

            if not cg_id:
                return None

            # Step 3: fetch historical price
            url = f"https://api.coingecko.com/api/v3/coins/{cg_id}/history"
            params = {"date": datetime.strptime(date_str, "%Y-%m-%d").strftime("%d-%m-%Y")}
            r = requests.get(url, params=params)
            if r.status_code == 200:
                data = r.json()
                price = data.get("market_data", {}).get("current_price", {}).get("usd")
                if price:
                    self.price_cache.set(cache_key, price)
                    return price
        except Exception as e:
            print(f"Price fetch error for {symbol} / {token_address}: {e}")
            return None

    # -------------------------------
    # Get current prices for unrealized PnL
    # -------------------------------
    def get_current_prices(self, tokens: List[Dict]) -> Dict[str, float]:
        """
        Fetch current USD prices for a list of tokens.
        tokens: List of dicts with keys 'symbol', 'address', 'blockchain'
        Returns: dict mapping token_address -> current_price_usd
        """
        prices = {}
        coingecko_ids = []
        token_map = {}  # cg_id -> token_address
        
        for token in tokens:
            symbol = token.get("symbol", "")
            address = token.get("address", "")
            blockchain = token.get("blockchain", "ethereum")
            
            cache_key = f"current_{address.lower()}"
            cached = self.price_cache.get(cache_key)
            if cached:
                prices[address] = cached
                continue
            
            cg_id = None
            
            # Try to resolve Coingecko ID
            if address:
                cached_cgid = self.address_cache.get(address.lower())
                if cached_cgid:
                    cg_id = cached_cgid
                else:
                    try:
                        url = f"https://api.coingecko.com/api/v3/coins/{blockchain}/contract/{address}"
                        r = requests.get(url)
                        if r.status_code == 200:
                            data = r.json()
                            cg_id = data.get("id")
                            if cg_id:
                                self.address_cache.set(address.lower(), cg_id)
                    except Exception as e:
                        print(f"Error resolving CG ID for {address}: {e}")
            
            # Fallback to symbol mapping
            if not cg_id and symbol:
                mapping = {
                    "eth": "ethereum",
                    "weth": "weth", 
                    "usdc": "usd-coin",
                    "usdt": "tether",
                    "bnb": "binancecoin",
                    "matic": "polygon",
                    "arb": "arbitrum",
                    "op": "optimism"
                }
                cg_id = mapping.get(symbol.lower())
            
            if cg_id:
                coingecko_ids.append(cg_id)
                token_map[cg_id] = address
        
        # Batch fetch current prices
        if coingecko_ids:
            try:
                url = "https://api.coingecko.com/api/v3/simple/price"
                params = {
                    "ids": ",".join(coingecko_ids),
                    "vs_currencies": "usd"
                }
                r = requests.get(url, params=params)
                if r.status_code == 200:
                    data = r.json()
                    for cg_id, price_data in data.items():
                        if "usd" in price_data:
                            token_address = token_map[cg_id]
                            price = price_data["usd"]
                            prices[token_address] = price
                            # Cache current prices briefly
                            cache_key = f"current_{token_address.lower()}"
                            self.price_cache.set(cache_key, price)
            except Exception as e:
                print(f"Error fetching current prices: {e}")
        
        return prices

    # -------------------------------
    # Enrich transfers with USD price + Gas cost
    # -------------------------------
    def get_detailed_data_for_wallet(self, wallet: str, max_per_chain: int = 50, chains: List[str] = None) -> pd.DataFrame:
        all_tx = []
        chains_to_fetch = chains if chains else list(self.chains.keys())
        
        # Known exchange addresses for better transaction classification
        known_exchanges = {
            '0x3f5ce5fbfe3e9af3971dd833d26ba9b5c936f0be',  # Binance
            '0x742d35cc6634c0532925a3b844bc454e4438f44e',  # Bitfinex
            '0x0681d8db095565fe8a346fa0277bffde9c0edbbf',  # BitMEX
            '0x563b377a956c80d77a7c613a9343699ad6123911',  # Poloniex
            # Add more as needed
        }
        
        for chain_name in chains_to_fetch:
            if chain_name not in self.chains:
                continue
                
            chain_id = self.chains[chain_name]
            print(f"Fetching ERC20 transfers on {chain_name}...")
            erc20_txs = self.get_erc20_transfers(wallet, chain=chain_id, limit=max_per_chain)
            
            for tx in erc20_txs:
                try:
                    decimals = int(tx.get("token_decimals") or 18)
                    raw_value = float(tx.get("value") or 0)
                    amount = raw_value / (10 ** decimals)
                    
                    # Skip transactions with zero amount
                    if amount <= 0:
                        continue
                        
                    timestamp = tx.get("block_timestamp")

                    symbol = tx.get("token_symbol", "")
                    token_address = tx.get("address", "")
                    price_usd = self.get_price_usd(symbol, timestamp, token_address, chain_name) or 0
                    usd_value = amount * price_usd

                    # Gas cost (native coin)
                    tx_hash = tx.get("transaction_hash")
                    gas_native = self.get_tx_gas_cost(tx_hash, chain_id) or 0
                    
                    # Enhanced transaction classification
                    from_addr = tx.get("from_address", "").lower()
                    to_addr = tx.get("to_address", "").lower()
                    wallet_lower = wallet.lower()
                    
                    if to_addr == wallet_lower and from_addr == wallet_lower:
                        tx_type = "self_transfer"
                    elif to_addr == wallet_lower:
                        tx_type = "buy" if from_addr in known_exchanges else "incoming"
                    elif from_addr == wallet_lower:
                        tx_type = "sell" if to_addr in known_exchanges else "outgoing"
                    else:
                        tx_type = "external"  # Should not happen for wallet-specific queries

                    enriched = {
                        "wallet": wallet,
                        "blockchain": chain_name,
                        "tx_hash": tx_hash,
                        "block_time": timestamp,
                        "token_symbol": symbol,
                        "token_address": token_address,
                        "amount": amount,
                        "price_usd": price_usd,
                        "usd_value": usd_value,
                        "gas_cost_native": gas_native,
                        "transaction_type": tx_type,
                        "from_address": from_addr,
                        "to_address": to_addr
                    }
                    all_tx.append(enriched)
                except Exception as e:
                    print(f"Error processing tx: {e}")
                    continue

            time.sleep(0.5)  # rate limit

        if not all_tx:
            return pd.DataFrame()

        df = pd.DataFrame(all_tx)
        df['block_time'] = pd.to_datetime(df['block_time'])
        
        # Clean up the index to be sequential without gaps
        df = df.sort_values("block_time").reset_index(drop=True)
        
        return df

# -------------------------------
# PnL CALCULATION
# -------------------------------
def calculate_pnl_improved(df: pd.DataFrame, method: str = "FIFO", analyzer: ExtendedMoralisAnalyzer = None) -> Tuple[float, float, pd.DataFrame]:
    """
    Improved PnL calculation that fetches current prices for unrealized PnL
    """
    # Validate inputs
    if df.empty:
        return 0.0, 0.0, pd.DataFrame(columns=["Token", "Realized PnL (USD)", "Unrealized PnL (USD)", "Current Holdings", "Avg Cost", "Current Price"])
    
    # Ensure proper sorting by time
    df = df.sort_values("block_time").reset_index(drop=True)
    
    positions = {}   # token -> list of lots (FIFO/LIFO)
    avg_costs = {}   # token -> (total_qty, total_cost_basis) for ACB
    realized_pnl = 0.0
    token_realized = {}
    
    print(f"Processing {len(df)} transactions using {method} method...")
    
    for idx, row in df.iterrows():
        token = row.get("token_symbol", "")
        if not token:
            continue
            
        # Handle different quantity column names
        qty = row.get("amount", row.get("token_amount", 0))
        price = row.get("price_usd", 0)
        tx_type = row.get("transaction_type", "")
        
        # Skip invalid transactions
        if qty <= 0 or price <= 0 or pd.isna(price):
            print(f"Skipping invalid transaction: qty={qty}, price={price}, type={tx_type}")
            continue
        
        # Classify transactions into buys/sells
        is_buy = tx_type in ["deposit", "buy", "swap_in", "mint", "receive"]
        is_sell = tx_type in ["withdrawal", "sell", "swap_out", "burn", "send"]
        
        # Skip non-trading transactions
        if not (is_buy or is_sell):
            print(f"Skipping non-trading transaction type: {tx_type}")
            continue
            
        # --- FIFO / LIFO Logic ---
        if method in ["FIFO", "LIFO"]:
            if is_buy:
                # Add to position
                lot = {"qty": float(qty), "cost": float(price)}
                positions.setdefault(token, []).append(lot)
                print(f"Added lot: {qty} {token} @ ${price}")
                
            elif is_sell:
                # Sell from position
                if token not in positions or not positions[token]:
                    print(f"WARNING: Selling {qty} {token} with no position!")
                    # Still record as realized loss (assuming cost basis = 0)
                    pnl_piece = qty * price  # All proceeds are gain
                    realized_pnl += pnl_piece
                    token_realized[token] = token_realized.get(token, 0.0) + pnl_piece
                    continue
                
                remaining_to_sell = float(qty)
                sell_price = float(price)
                
                while remaining_to_sell > 0 and positions[token]:
                    # Get lot based on method
                    lot_idx = 0 if method == "FIFO" else -1
                    lot = positions[token][lot_idx]
                    
                    lot_qty = lot["qty"]
                    lot_cost = lot["cost"]
                    
                    # Determine how much to sell from this lot
                    qty_to_sell = min(remaining_to_sell, lot_qty)
                    
                    # Calculate PnL for this portion
                    proceeds = qty_to_sell * sell_price
                    cost_basis = qty_to_sell * lot_cost
                    pnl_piece = proceeds - cost_basis
                    
                    realized_pnl += pnl_piece
                    token_realized[token] = token_realized.get(token, 0.0) + pnl_piece
                    
                    print(f"Sold {qty_to_sell} {token}: ${proceeds:.2f} proceeds - ${cost_basis:.2f} cost = ${pnl_piece:.2f} PnL")
                    
                    # Update lot and remaining
                    lot["qty"] -= qty_to_sell
                    remaining_to_sell -= qty_to_sell
                    
                    # Remove empty lots
                    if lot["qty"] <= 0:
                        positions[token].pop(lot_idx)
        
        # --- ACB (Average Cost Basis) Logic ---
        elif method == "ACB":
            if is_buy:
                # Update average cost basis
                current_qty, current_total_cost = avg_costs.get(token, (0.0, 0.0))
                new_qty = current_qty + qty
                new_total_cost = current_total_cost + (qty * price)
                avg_costs[token] = (new_qty, new_total_cost)
                print(f"ACB updated for {token}: {new_qty} units, avg cost = ${new_total_cost/new_qty:.4f}")
                
            elif is_sell:
                current_qty, current_total_cost = avg_costs.get(token, (0.0, 0.0))
                
                if current_qty <= 0:
                    print(f"WARNING: Selling {qty} {token} with no ACB position!")
                    # Treat as all gain
                    pnl_piece = qty * price
                    realized_pnl += pnl_piece
                    token_realized[token] = token_realized.get(token, 0.0) + pnl_piece
                    continue
                
                # Calculate average cost
                avg_cost = current_total_cost / current_qty if current_qty > 0 else 0
                
                # Calculate PnL
                qty_to_sell = min(qty, current_qty)  # Can't sell more than we have
                proceeds = qty_to_sell * price
                cost_basis = qty_to_sell * avg_cost
                pnl_piece = proceeds - cost_basis
                
                realized_pnl += pnl_piece
                token_realized[token] = token_realized.get(token, 0.0) + pnl_piece
                
                print(f"ACB sale: {qty_to_sell} {token} @ ${price} vs avg cost ${avg_cost:.4f} = ${pnl_piece:.2f} PnL")
                
                # Update position
                new_qty = max(0, current_qty - qty_to_sell)
                new_total_cost = max(0, current_total_cost - (qty_to_sell * avg_cost))
                avg_costs[token] = (new_qty, new_total_cost)
    
    # --- Calculate Unrealized PnL with CURRENT PRICES ---
    print("\nCalculating unrealized PnL with current market prices...")
    unrealized_pnl = 0.0
    token_unrealized = {}
    token_holdings = {}
    current_prices = {}
    
    # Collect tokens for current price lookup
    tokens_for_current_prices = []
    
    if method in ["FIFO", "LIFO"]:
        for token, lots in positions.items():
            if lots:
                # Get token info from the dataframe
                token_df = df[df["token_symbol"] == token]
                if not token_df.empty:
                    token_row = token_df.iloc[0]
                    tokens_for_current_prices.append({
                        "symbol": token,
                        "address": token_row.get("token_address", ""),
                        "blockchain": token_row.get("blockchain", "ethereum")
                    })
    
    elif method == "ACB":
        for token, (total_qty, _) in avg_costs.items():
            if total_qty > 0:
                token_df = df[df["token_symbol"] == token]
                if not token_df.empty:
                    token_row = token_df.iloc[0]
                    tokens_for_current_prices.append({
                        "symbol": token,
                        "address": token_row.get("token_address", ""),
                        "blockchain": token_row.get("blockchain", "ethereum")
                    })
    
    # Fetch current prices if analyzer is available
    if analyzer and tokens_for_current_prices:
        try:
            current_prices = analyzer.get_current_prices(tokens_for_current_prices)
            print(f"Fetched current prices for {len(current_prices)} tokens")
        except Exception as e:
            print(f"Error fetching current prices: {e}")
            # Fallback to last transaction prices
            for token in set(df["token_symbol"]):
                token_df = df[df["token_symbol"] == token]
                if not token_df.empty:
                    token_addr = token_df.iloc[-1]["token_address"]
                    current_prices[token_addr] = token_df.iloc[-1]["price_usd"]
    else:
        # Fallback: use last transaction prices
        print("Using last transaction prices as current prices")
        for token in set(df["token_symbol"]):
            token_df = df[df["token_symbol"] == token]
            if not token_df.empty:
                token_addr = token_df.iloc[-1]["token_address"]
                current_prices[token_addr] = token_df.iloc[-1]["price_usd"]
    
    # Calculate unrealized PnL for FIFO/LIFO
    if method in ["FIFO", "LIFO"]:
        for token, lots in positions.items():
            if not lots:
                continue
                
            # Get token info for price lookup
            token_df = df[df["token_symbol"] == token]
            if token_df.empty:
                continue
                
            token_addr = token_df.iloc[0]["token_address"]
            current_price = current_prices.get(token_addr, 0)
            
            if current_price <= 0:
                print(f"No current price available for {token}, skipping unrealized PnL")
                continue
            
            # Calculate total quantity and cost basis
            total_qty = sum(lot["qty"] for lot in lots)
            total_cost_basis = sum(lot["qty"] * lot["cost"] for lot in lots)
            
            # Calculate unrealized PnL
            current_value = total_qty * current_price
            unrealized_pnl_token = current_value - total_cost_basis
            
            unrealized_pnl += unrealized_pnl_token
            token_unrealized[token] = unrealized_pnl_token
            token_holdings[token] = {
                "qty": total_qty,
                "avg_cost": total_cost_basis / total_qty if total_qty > 0 else 0,
                "current_price": current_price,
                "current_value": current_value
            }
            
            print(f"{token}: {total_qty:.4f} units @ avg ${total_cost_basis/total_qty:.4f}, current ${current_price:.4f} = ${unrealized_pnl_token:.2f} unrealized")
    
    # Calculate unrealized PnL for ACB
    elif method == "ACB":
        for token, (total_qty, total_cost_basis) in avg_costs.items():
            if total_qty <= 0:
                continue
                
            token_df = df[df["token_symbol"] == token]
            if token_df.empty:
                continue
            
            token_addr = token_df.iloc[0]["token_address"]
            current_price = current_prices.get(token_addr, 0)
            
            if current_price <= 0:
                print(f"No current price available for {token}, skipping unrealized PnL")
                continue
            
            avg_cost = total_cost_basis / total_qty if total_qty > 0 else 0
            
            current_value = total_qty * current_price
            unrealized_pnl_token = current_value - total_cost_basis
            
            unrealized_pnl += unrealized_pnl_token
            token_unrealized[token] = unrealized_pnl_token
            token_holdings[token] = {
                "qty": total_qty,
                "avg_cost": avg_cost,
                "current_price": current_price,
                "current_value": current_value
            }
            
            print(f"{token}: {total_qty:.4f} units @ avg ${avg_cost:.4f}, current ${current_price:.4f} = ${unrealized_pnl_token:.2f} unrealized")
    
    # --- Build detailed breakdown ---
    all_tokens = set(token_realized.keys()).union(token_unrealized.keys())
    token_data = []
    
    for token in all_tokens:
        holdings = token_holdings.get(token, {})
        token_data.append({
            "Token": token,
            "Realized PnL (USD)": round(token_realized.get(token, 0.0), 2),
            "Unrealized PnL (USD)": round(token_unrealized.get(token, 0.0), 2),
            "Current Holdings": round(holdings.get("qty", 0.0), 6),
            "Avg Cost": round(holdings.get("avg_cost", 0.0), 4),
            "Current Price": round(holdings.get("current_price", 0.0), 4),
            "Current Value": round(holdings.get("current_value", 0.0), 2)
        })
    
    breakdown_df = pd.DataFrame(token_data)
    if not breakdown_df.empty:
        breakdown_df = breakdown_df.sort_values("Realized PnL (USD)", ascending=False)
    
    print(f"\nFinal Results:")
    print(f"Realized PnL: ${realized_pnl:.2f}")
    print(f"Unrealized PnL: ${unrealized_pnl:.2f}")
    print(f"Total PnL: ${realized_pnl + unrealized_pnl:.2f}")
    
    return realized_pnl, unrealized_pnl, breakdown_df


# -------------------------------
# PnL VALIDATION FUNCTION
# -------------------------------
def validate_pnl_calculation(df: pd.DataFrame, realized_pnl: float, unrealized_pnl: float, breakdown_df: pd.DataFrame) -> pd.DataFrame:
    """
    Validate PnL calculations for reasonableness and consistency.
    """
    validation_results = []
    
    # Check 1: Total PnL components should sum correctly
    breakdown_realized_sum = breakdown_df["Realized PnL (USD)"].sum() if not breakdown_df.empty else 0
    breakdown_unrealized_sum = breakdown_df["Unrealized PnL (USD)"].sum() if not breakdown_df.empty else 0
    
    validation_results.append({
        "Check": "PnL Components Sum",
        "Expected Realized": realized_pnl,
        "Breakdown Realized": breakdown_realized_sum,
        "Expected Unrealized": unrealized_pnl,
        "Breakdown Unrealized": breakdown_unrealized_sum,
        "Pass": abs(realized_pnl - breakdown_realized_sum) < 0.01 and abs(unrealized_pnl - breakdown_unrealized_sum) < 0.01
    })
    
    # Check 2: No negative holdings
    if not breakdown_df.empty:
        negative_holdings = breakdown_df[breakdown_df["Current Holdings"] < 0]
        validation_results.append({
            "Check": "No Negative Holdings",
            "Negative Count": len(negative_holdings),
            "Pass": len(negative_holdings) == 0
        })
    
    # Check 3: Reasonable price ranges
    if not df.empty:
        price_stats = df["price_usd"].describe()
        validation_results.append({
            "Check": "Price Range Reasonableness",
            "Min Price": price_stats["min"],
            "Max Price": price_stats["max"],
            "Pass": price_stats["min"] >= 0 and price_stats["max"] < 1000000  # Basic sanity check
        })
    
    return pd.DataFrame(validation_results)


# -------------------------------
# Simple token price fetcher (fallback)
# -------------------------------
def get_token_price(token_symbol: str) -> Optional[float]:
    """
    Simple fallback function to get token prices.
    In a real implementation, this would use an API like CoinGecko.
    """
    # This is a simple mock implementation
    price_mapping = {
        "ETH": 3000.0,
        "BTC": 60000.0,
        "USDC": 1.0,
        "USDT": 1.0,
        "DAI": 1.0,
        "WBTC": 60000.0,
        "WETH": 3000.0,
    }
    return price_mapping.get(token_symbol.upper())

### app.py

In [None]:
# app.py
import os
import time
import glob
import pandas as pd
import streamlit as st
from datetime import datetime, timedelta
from dotenv import load_dotenv
from analyzer import ExtendedMoralisAnalyzer, calculate_pnl_improved, validate_pnl_calculation
from price_fetcher import get_token_price

import numpy as np
import pytz
import random
import logging
import traceback


# Sample Data
def generate_sample_data(n_days=7, txs_per_day=7, wallet_address="0xDEADBEEF1234567890ABCDEF1234567890ABCDEF"):
    np.random.seed(42)  # reproducible
    rows = []
    start_date = datetime.today() - timedelta(days=n_days)

    tokens = [
        ("USDC", 1.0),
        ("USDT", 1.0), 
        ("ETH", 2000.0),
        ("ARB", 3.0),
        ("OP", 3.5),
        ("MATIC", 0.7),
    ]
    chains = ["eth", "arbitrum", "optimism", "polygon"]

    tx_types = ["deposit", "withdrawal", "buy", "sell"]

    for d in range(n_days):
        for _ in range(txs_per_day):
            block_time = start_date + timedelta(days=d, hours=np.random.randint(0, 24))
            token, base_price = tokens[np.random.randint(len(tokens))]
            
            # Generate slightly different current vs historical prices for unrealized PnL demo
            historical_price = round(base_price * np.random.uniform(0.85, 1.15), 2)
            
            amount = round(np.random.uniform(10, 1000), 2) if token in ["USDC", "USDT"] else round(np.random.uniform(0.1, 20), 4)

            tx_type = np.random.choice(tx_types)

            # USD value logic
            if tx_type in ["deposit", "buy"]:
                usd_value = amount * historical_price
            elif tx_type in ["withdrawal", "sell"]:
                usd_value = amount * historical_price * np.random.uniform(0.95, 1.05)
            else:
                usd_value = amount * historical_price

            # fake addresses
            from_addr = f"0x{random.randint(10**15, 10**18):x}"
            to_addr = f"0x{random.randint(10**15, 10**18):x}"

            # Simulate some withdrawals going back to your own wallet
            if tx_type == "withdrawal" and random.random() < 0.3:  # 30% chance
                to_addr = wallet_address
                tx_type = "withdrawal_move"

            rows.append({
                "tx_hash": f"0x{random.randint(10**15, 10**18):x}",
                "block_time": block_time,
                "blockchain": np.random.choice(chains),
                "transaction_type": tx_type,
                "amount": amount,               
                "price_usd": historical_price,             
                "usd_value": usd_value,         
                "gas_cost_usd": round(np.random.uniform(1, 20), 2),
                "token_symbol": token,
                "token_address": f"0x{random.randint(10**15, 10**18):x}",
                "from_address": from_addr,
                "to_address": to_addr,
            })

    return pd.DataFrame(rows)

# Mock current prices for sample data (slightly different from historical for demo)
def get_sample_current_prices():
    """Generate mock current prices that differ from historical prices"""
    return {
        # These would be token addresses in real data, using symbols for demo
        "USDC": 1.00,
        "USDT": 0.999,
        "ETH": 2150.0,  # Higher than historical average
        "ARB": 2.85,    # Lower than historical average  
        "OP": 3.75,     # Higher than historical average
        "MATIC": 0.72,  # Slightly higher
    }

# Create ~50 transactions across a week
sample_df = generate_sample_data(n_days=7, txs_per_day=7)

# -------------------------------
# Setup
# -------------------------------
st.set_page_config(page_title="Wallet PnL Explorer", page_icon="💰", layout="wide")
load_dotenv()
API_KEY = os.getenv("MORALIS_API_KEY")
if not API_KEY:
    st.error("⚠️ Please add MORALIS_API_KEY to your .env file!")
    st.stop()

CACHE_DIR = "cache"
os.makedirs(CACHE_DIR, exist_ok=True)

# -------------------------------
# Disk cache helpers (per-wallet + per-chain)
# -------------------------------
def _wallet_dir(wallet: str):
    return os.path.join(CACHE_DIR, wallet.lower())

def save_to_disk(wallet: str, chain: str, df: pd.DataFrame):
    wdir = _wallet_dir(wallet)
    os.makedirs(wdir, exist_ok=True)
    path = os.path.join(wdir, f"{chain}.parquet")
    df.to_parquet(path, index=False)

def load_from_disk(wallet: str, chain: str):
    path = os.path.join(_wallet_dir(wallet), f"{chain}.parquet")
    if os.path.exists(path):
        return pd.read_parquet(path)
    return None

@st.cache_data(show_spinner=False)
def get_wallet_data(_analyzer, wallet: str, chains: list, max_txs: int, force_refresh: bool = False) -> pd.DataFrame:
    """Hybrid memory+disk+API cache. Returns concatenated df for requested chains."""
    dfs = []
    for ch in chains:
        if not force_refresh:
            cached = load_from_disk(wallet, ch)
            if cached is not None:
                dfs.append(cached)
                continue

        # API call for that chain
        try:
            df = _analyzer.get_detailed_data_for_wallet(wallet, max_per_chain=max_txs, chains=[ch])
        except TypeError:
            # Fallback: fetch all and filter by chain
            df_all = _analyzer.get_detailed_data_for_wallet(wallet, max_per_chain=max_txs)
            df = df_all[df_all["blockchain"] == ch] if not df_all.empty else df_all

        if not df.empty:
            save_to_disk(wallet, ch, df)
            dfs.append(df)

    if dfs:
        return pd.concat(dfs, ignore_index=True)
    return pd.DataFrame()

def main():
    st.title("Wallet PnL Explorer")
    st.sidebar.header("🔧 Controls")

    # Sidebar controls
    diagnostic_mode = st.sidebar.checkbox("Enable Diagnostic Mode", value=False)
    pnl_method = st.sidebar.selectbox("PnL Accounting Method", ["FIFO", "LIFO", "ACB"], index=0)
    wallet_address = st.sidebar.text_input("Wallet Address", value="", help="Leave empty to preview demo data.")
    selected_chains = st.sidebar.multiselect(
        "Blockchains",
        ["eth", "bsc", "polygon", "arbitrum", "optimism", "base"],
        default=["eth", "arbitrum", "optimism"]
    )
    start_date = st.sidebar.date_input("Start Date", value=(datetime.utcnow() - timedelta(days=30)).date())
    end_date = st.sidebar.date_input("End Date", value=datetime.utcnow().date())
    max_txs = st.sidebar.slider("Max transactions per chain", min_value=10, max_value=200, value=50, step=10)
    cache_mode = st.sidebar.radio("Cache Mode", ["Always Use Cache", "Force Refresh", "Disable Cache"], index=0)
    analyze_button = st.sidebar.button("🔍 Analyze Wallet")

    # Initialize analyzer
    if cache_mode == "Always Use Cache":
        analyzer = ExtendedMoralisAnalyzer(API_KEY, use_cache=True, force_refresh=False)
        force_refresh = False
    elif cache_mode == "Force Refresh":
        analyzer = ExtendedMoralisAnalyzer(API_KEY, use_cache=True, force_refresh=True)
        force_refresh = True
    else:
        analyzer = ExtendedMoralisAnalyzer(API_KEY, use_cache=False)
        force_refresh = False

    # Determine wallet mode
    if analyze_button and wallet_address.strip():
        chosen_wallet = wallet_address.strip()
        using_default = False
        window_start = datetime.combine(start_date, datetime.min.time())
        window_end = datetime.combine(end_date, datetime.max.time())
    else:
        chosen_wallet = "sample_wallet"
        using_default = True
        window_start = sample_df["block_time"].min()
        window_end = sample_df["block_time"].max()
        st.info("💡 Sample wallet preview for the past 7 days: Enter your wallet on the left to analyze real data.")

    if not selected_chains:
        st.warning("Please select at least one blockchain in the sidebar.")
        st.stop()

    # Fetch/load wallet data
    if using_default:
        df = sample_df.copy()
    else:
        progress = st.progress(0, text="Preparing analysis...")
        progress.progress(20, text="Checking cache / fetching data...")
        df = get_wallet_data(analyzer, chosen_wallet, selected_chains, max_txs, force_refresh=force_refresh)
        progress.progress(50, text="Applying filters...")

        if df.empty:
            progress.empty()
            st.error("No transactions found for this wallet.")
            st.stop()

        # Ensure UTC datetime
        if df["block_time"].dt.tz is None:
            df["block_time"] = df["block_time"].dt.tz_localize("UTC")
        window_start = pd.Timestamp(window_start).tz_localize("UTC")
        window_end = pd.Timestamp(window_end).tz_localize("UTC")

        # Filter by date window
        df = df[(df["block_time"] >= window_start) & (df["block_time"] <= window_end)]

        # Keep only tokens with valid prices
        df = df[df["price_usd"].notna() & (df["price_usd"] > 0)]

        # Mark withdrawals to your own wallet as moves
        df['transaction_type'] = df.apply(
            lambda row: 'withdrawal_move'
            if row['transaction_type'] == 'withdrawal' and str(row.get('to_address', '')).lower() == chosen_wallet.lower()
            else row['transaction_type'],
            axis=1
        )

        progress.progress(70, text="Computing summaries and PnL...")

    if df.empty:
        st.warning("⚠️ No transactions available after filters.")
        st.stop()

    # -------------------------------
    # Summary metrics
    # -------------------------------
    total_in = float(df[df["transaction_type"] == "deposit"]["usd_value"].sum())
    total_out = float(df[df["transaction_type"] == "withdrawal"]["usd_value"].sum())
    gas_cost = float(df.get("gas_cost_usd", pd.Series()).fillna(0).sum()) if "gas_cost_usd" in df else 0.0
    pnl = total_in - total_out - gas_cost

    col1, col2, col3, col4 = st.columns(4)
    col1.metric("Total Deposits (USD)", f"${total_in:,.2f}")
    col2.metric("Total Withdrawals (USD)", f"${total_out:,.2f}")
    col3.metric("Gas Costs (USD)", f"${gas_cost:,.2f}")
    col4.metric("Net Cash Flow (USD)", f"${pnl:,.2f}")

    # -------------------------------
    # PnL calculation with current prices
    # -------------------------------
    realized_total = 0.0
    unrealized_total = 0.0
    breakdown_list = []

    # Group by token_symbol for sample data, token_address for real data
    group_key = 'token_symbol' if using_default else 'token_address'
    grouped = df.groupby(group_key)

    tokens_with_valid_prices = set()
    tokens_with_missing_prices = set()

    for token_key, group in grouped:
        # Filter group to rows with valid prices
        group_valid = group[group['price_usd'].notna() & (group['price_usd'] > 0)]
        if group_valid.empty:
            tokens_with_missing_prices.add(token_key)
            continue

        tokens_with_valid_prices.add(token_key)

        # Calculate PnL for this token group - PASS ANALYZER for current prices
        if using_default:
            # For sample data, create a mock analyzer that returns sample current prices
            class MockAnalyzer:
                def get_current_prices(self, tokens):
                    sample_prices = get_sample_current_prices()
                    result = {}
                    for token in tokens:
                        symbol = token.get('symbol', '')
                        result[token.get('address', symbol)] = sample_prices.get(symbol, 0)
                    return result
            
            mock_analyzer = MockAnalyzer()
            realized, unrealized, breakdown = calculate_pnl_improved(group_valid, method=pnl_method, analyzer=mock_analyzer)
        else:
            realized, unrealized, breakdown = calculate_pnl_improved(group_valid, method=pnl_method, analyzer=analyzer)

        realized_total += realized
        unrealized_total += unrealized
        breakdown_list.append(breakdown)

    # Combine breakdowns into one DataFrame
    if breakdown_list:
        breakdown_df = pd.concat(breakdown_list, ignore_index=True)
    else:
        breakdown_df = pd.DataFrame()

    if tokens_with_missing_prices:
        st.warning(f"Tokens excluded from PnL due to missing prices: {tokens_with_missing_prices}")

    # -------------------------------
    # PnL Validation
    # -------------------------------
    validation_df = validate_pnl_calculation(df[df[group_key].isin(tokens_with_valid_prices)], realized_total, unrealized_total, breakdown_df)
    failed_validations = validation_df[validation_df['Pass'] == False]

    if not failed_validations.empty:
        st.warning("⚠️ PnL Validation Issues Detected")
        with st.expander("View Validation Details", expanded=True):
            st.dataframe(validation_df, use_container_width=True)
            st.write("**Issues found:**")
            for _, row in failed_validations.iterrows():
                st.write(f"- {row['Check']}: Failed")
    else:
        st.success("✅ PnL Calculations Validated Successfully")
        with st.expander("View Validation Details"):
            st.dataframe(validation_df, use_container_width=True)
  
    col1, col2, col3 = st.columns(3)
    col1.metric(f"{pnl_method} Realized PnL (USD)", f"${realized_total:,.2f}")
    col2.metric(f"{pnl_method} Unrealized PnL (USD)", f"${unrealized_total:,.2f}")
    
    # Count open positions
    open_positions = len(breakdown_df[breakdown_df['Current Holdings'] > 0]) if not breakdown_df.empty else 0
    col3.metric("Open Positions", f"{open_positions}")

    st.subheader("💹 PnL Breakdown by Token")
    if not breakdown_df.empty:
        st.dataframe(breakdown_df, use_container_width=True, height=320)
    else:
        st.info("No PnL data available.")
     
    # -------------------------------
    # Transactions table
    # -------------------------------
    st.subheader("📊 Enriched Transactions")
    st.dataframe(df, use_container_width=True, height=420)

    from price_fetcher import get_token_price

    # Replace invalid prices with historical fetch
    df["price_usd"] = df.apply(
        lambda row: row["price_usd"]
        if row["price_usd"] > 0
        else (
            get_token_price(
                row["token_symbol"],
                row.get("token_address"),
                row["blockchain"],
                block_time=row["block_time"]
            ) or 0
        ),
        axis=1
    )

    # Drop unsupported tokens (no price found)
    df = df[df["price_usd"] > 0]



    if not df.empty:
        realized_pnl, unrealized_pnl, breakdown_df = calculate_pnl_improved(
        df, method=pnl_method, analyzer=analyzer
    )

    if diagnostic_mode:
        st.subheader("🔍 Diagnostic Report")
        validation_df = validate_pnl_calculation(df, realized_pnl, unrealized_pnl, breakdown_df)
        st.dataframe(validation_df, use_container_width=True)

        st.subheader("📊 First 10 Transactions")
        st.dataframe(df.head(10), use_container_width=True)

        st.subheader("📊 PnL Breakdown")
        st.dataframe(breakdown_df, use_container_width=True)


    if not using_default:
        with st.expander("📂 View local cache files"):
            files = glob.glob(os.path.join(CACHE_DIR, chosen_wallet.lower(), "*.parquet"))
            st.write([os.path.basename(f) for f in files]) if files else st.write("No cache files yet.")

        progress.progress(100, text="Done!")
        time.sleep(0.1)
        progress.empty()
      
if __name__ == "__main__":
    main()

In [1]:
import os
import json
import time
from dotenv import load_dotenv
import requests
import pandas as pd
from datetime import datetime
from typing import Dict, List, Optional

# -------------------------------
# Cache for prices
# -------------------------------
class PriceCache:
    def __init__(self, filename="price_cache.json"):
        self.filename = filename
        self.cache = {}
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}

    def get(self, key: str):
        return self.cache.get(key)

    def set(self, key: str, value):
        self.cache[key] = value
        with open(self.filename, "w") as f:
            json.dump(self.cache, f)

# -------------------------------
# Cache for contract-to-CGID mapping
# -------------------------------
class AddressCache:
    def __init__(self, filename="address_to_cgid.json"):
        self.filename = filename
        self.cache = {}
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    self.cache = json.load(f)
                except:
                    self.cache = {}

    def get(self, key: str):
        return self.cache.get(key)

    def set(self, key: str, value):
        self.cache[key] = value
        with open(self.filename, "w") as f:
            json.dump(self.cache, f)

# -------------------------------
# Extended Analyzer
# -------------------------------
class ExtendedMoralisAnalyzer:
    def __init__(self, api_key: str, use_cache: bool = True, force_refresh: bool = False):
        self.api_key = api_key
        self.base_url = "https://deep-index.moralis.io/api/v2"
        self.headers = {"Accept": "application/json", "X-API-Key": api_key}

        self.chains = {
            'eth': '0x1',
            'bsc': '0x38',
            'polygon': '0x89',
            'arbitrum': '0xa4b1',
            'optimism': '0xa',
            'base': '0x2105'
        }

        self.price_cache = PriceCache()
        self.address_cache = AddressCache()

    # -------------------------------
    # Fetch ERC20 Transfers
    # -------------------------------
    def get_erc20_transfers(self, wallet: str, chain: str, limit: int = 50) -> List[Dict]:
        try:
            url = f"{self.base_url}/{wallet}/erc20/transfers"
            params = {"chain": chain, "limit": limit}
            response = requests.get(url, headers=self.headers, params=params)
            if response.status_code == 200:
                return response.json().get('result', [])
            return []
        except Exception as e:
            print(f"ERC20 transfer error: {e}")
            return []

    # -------------------------------
    # Fetch Tx Gas Cost (in native coin)
    # -------------------------------
    def get_tx_gas_cost(self, tx_hash: str, chain: str) -> Optional[float]:
        try:
            url = f"{self.base_url}/transaction/{tx_hash}"
            params = {"chain": chain}
            r = requests.get(url, headers=self.headers, params=params)
            if r.status_code == 200:
                data = r.json()
                gas_used = int(data.get("receipt_gas_used") or 0)
                gas_price = int(data.get("gas_price") or 0)
                native_spent = gas_used * gas_price / 1e18
                return native_spent
        except Exception as e:
            print(f"Gas fetch failed for {tx_hash}: {e}")
        return None

    # -------------------------------
    # PRICE FETCHER (Coingecko)
    # -------------------------------
    def get_price_usd(self, symbol: str, timestamp: str, token_address: str = None, blockchain: str = "ethereum") -> Optional[float]:
        if not token_address and not symbol:
            return None

        date_str = timestamp.split("T")[0]
        cache_key = f"{token_address or symbol}_{date_str}"
        cached = self.price_cache.get(cache_key)
        if cached:
            return cached

        cg_id = None

        try:
            # Step 1: prefer contract lookup first
            if token_address:
                # Check local address cache
                cached_cgid = self.address_cache.get(token_address.lower())
                if cached_cgid:
                    cg_id = cached_cgid
                else:
                    # Query Coingecko contract API
                    url = f"https://api.coingecko.com/api/v3/coins/{blockchain}/contract/{token_address}"
                    r = requests.get(url)
                    if r.status_code == 200:
                        data = r.json()
                        cg_id = data.get("id")
                        if cg_id:
                            self.address_cache.set(token_address.lower(), cg_id)

            # Step 2: fallback to hardcoded mapping if contract failed
            if not cg_id and symbol:
                mapping = {
                    "eth": "ethereum",
                    "weth": "weth",
                    "usdc": "usd-coin",
                    "usdt": "tether",
                    "bnb": "binancecoin",
                    "matic": "polygon"
                }
                cg_id = mapping.get(symbol.lower())

            if not cg_id:
                return None

            # Step 3: fetch historical price
            url = f"https://api.coingecko.com/api/v3/coins/{cg_id}/history"
            params = {"date": datetime.strptime(date_str, "%Y-%m-%d").strftime("%d-%m-%Y")}
            r = requests.get(url, params=params)
            if r.status_code == 200:
                data = r.json()
                price = data.get("market_data", {}).get("current_price", {}).get("usd")
                if price:
                    self.price_cache.set(cache_key, price)
                    return price
        except Exception as e:
            print(f"Price fetch error for {symbol} / {token_address}: {e}")
            return None

    # -------------------------------
    # NEW: Get current prices for unrealized PnL
    # -------------------------------
    def get_current_prices(self, tokens: List[Dict]) -> Dict[str, float]:
        """
        Fetch current USD prices for a list of tokens.
        tokens: List of dicts with keys 'symbol', 'address', 'blockchain'
        Returns: dict mapping token_address -> current_price_usd
        """
        prices = {}
        coingecko_ids = []
        token_map = {}  # cg_id -> token_address
        
        for token in tokens:
            symbol = token.get("symbol", "")
            address = token.get("address", "")
            blockchain = token.get("blockchain", "ethereum")
            
            cache_key = f"current_{address.lower()}"
            cached = self.price_cache.get(cache_key)
            if cached:
                prices[address] = cached
                continue
            
            cg_id = None
            
            # Try to resolve Coingecko ID
            if address:
                cached_cgid = self.address_cache.get(address.lower())
                if cached_cgid:
                    cg_id = cached_cgid
                else:
                    try:
                        url = f"https://api.coingecko.com/api/v3/coins/{blockchain}/contract/{address}"
                        r = requests.get(url)
                        if r.status_code == 200:
                            data = r.json()
                            cg_id = data.get("id")
                            if cg_id:
                                self.address_cache.set(address.lower(), cg_id)
                    except Exception as e:
                        print(f"Error resolving CG ID for {address}: {e}")
            
            # Fallback to symbol mapping
            if not cg_id and symbol:
                mapping = {
                    "eth": "ethereum",
                    "weth": "weth", 
                    "usdc": "usd-coin",
                    "usdt": "tether",
                    "bnb": "binancecoin",
                    "matic": "polygon",
                    "arb": "arbitrum",
                    "op": "optimism"
                }
                cg_id = mapping.get(symbol.lower())
            
            if cg_id:
                coingecko_ids.append(cg_id)
                token_map[cg_id] = address
        
        # Batch fetch current prices
        if coingecko_ids:
            try:
                url = "https://api.coingecko.com/api/v3/simple/price"
                params = {
                    "ids": ",".join(coingecko_ids),
                    "vs_currencies": "usd"
                }
                r = requests.get(url, params=params)
                if r.status_code == 200:
                    data = r.json()
                    for cg_id, price_data in data.items():
                        if "usd" in price_data:
                            token_address = token_map[cg_id]
                            price = price_data["usd"]
                            prices[token_address] = price
                            # Cache current prices briefly
                            cache_key = f"current_{token_address.lower()}"
                            self.price_cache.set(cache_key, price)
            except Exception as e:
                print(f"Error fetching current prices: {e}")
        
        return prices

    # -------------------------------
    # Enrich transfers with USD price + Gas cost
    # -------------------------------
    def get_detailed_data_for_wallet(self, wallet: str, max_per_chain: int = 50, chains: List[str] = None) -> pd.DataFrame:
        all_tx = []
        chains_to_fetch = chains if chains else list(self.chains.keys())
        
        for chain_name in chains_to_fetch:
            if chain_name not in self.chains:
                continue
                
            chain_id = self.chains[chain_name]
            print(f"Fetching ERC20 transfers on {chain_name}...")
            erc20_txs = self.get_erc20_transfers(wallet, chain=chain_id, limit=max_per_chain)
            
            for tx in erc20_txs:
                try:
                    decimals = int(tx.get("token_decimals") or 18)
                    raw_value = float(tx.get("value") or 0)
                    amount = raw_value / (10 ** decimals)
                    timestamp = tx.get("block_timestamp")

                    symbol = tx.get("token_symbol", "")
                    token_address = tx.get("address", "")
                    price_usd = self.get_price_usd(symbol, timestamp, token_address, chain_name) or 0
                    usd_value = amount * price_usd

                    # Gas cost (native coin)
                    tx_hash = tx.get("transaction_hash")
                    gas_native = self.get_tx_gas_cost(tx_hash, chain_id) or 0

                    enriched = {
                        "wallet": wallet,
                        "blockchain": chain_name,
                        "tx_hash": tx_hash,
                        "block_time": timestamp,
                        "token_symbol": symbol,
                        "token_address": token_address,
                        "amount": amount,
                        "price_usd": price_usd,
                        "usd_value": usd_value,
                        "gas_cost_native": gas_native,
                        "transaction_type": "deposit" if tx.get("to_address", "").lower() == wallet.lower() else "withdrawal"
                    }
                    all_tx.append(enriched)
                except Exception as e:
                    print(f"Error processing tx: {e}")
                    continue

            time.sleep(0.5)  # rate limit

        if not all_tx:
            return pd.DataFrame()

        df = pd.DataFrame(all_tx)
        df['block_time'] = pd.to_datetime(df['block_time'])
        return df.sort_values("block_time").reset_index(drop=True)


# -------------------------------
# FIXED PnL CALCULATION
# -------------------------------
def calculate_pnl_improved(df, method="FIFO", analyzer=None):
    """
    FIXED: Improved PnL calculation that fetches current prices for unrealized PnL
    """
    # Validate inputs
    if df.empty:
        return 0, 0, pd.DataFrame(columns=["Token", "Realized PnL (USD)", "Unrealized PnL (USD)", "Current Holdings", "Avg Cost", "Current Price"])
    
    # Ensure proper sorting by time
    df = df.sort_values("block_time").reset_index(drop=True)
    
    positions = {}   # token -> list of lots (FIFO/LIFO)
    avg_costs = {}   # token -> (total_qty, total_cost_basis) for ACB
    realized_pnl = 0
    token_realized = {}
    token_unrealized = {}
    
    print(f"Processing {len(df)} transactions using {method} method...")
    
    for idx, row in df.iterrows():
        token = row.get("token_symbol", "")
        if not token:
            continue
            
        # Handle different quantity column names
        qty = row.get("amount", row.get("token_amount", 0))
        price = row.get("price_usd", 0)
        tx_type = row.get("transaction_type", "")
        
        # Skip invalid transactions
        if qty <= 0 or price <= 0 or pd.isna(price):
            print(f"Skipping invalid transaction: qty={qty}, price={price}, type={tx_type}")
            continue
        
        # Classify transactions into buys/sells
        is_buy = tx_type in ["deposit", "buy", "swap_in", "mint", "receive"]
        is_sell = tx_type in ["withdrawal", "sell", "swap_out", "burn", "send"]
        
        # Skip non-trading transactions
        if not (is_buy or is_sell):
            print(f"Skipping non-trading transaction type: {tx_type}")
            continue
            
        # --- FIFO / LIFO Logic ---
        if method in ["FIFO", "LIFO"]:
            if is_buy:
                # Add to position
                lot = {"qty": float(qty), "cost": float(price)}
                positions.setdefault(token, []).append(lot)
                print(f"Added lot: {qty} {token} @ ${price}")
                
            elif is_sell:
                # Sell from position
                if token not in positions or not positions[token]:
                    print(f"WARNING: Selling {qty} {token} with no position!")
                    # Still record as realized loss (assuming cost basis = 0)
                    pnl_piece = qty * price  # All proceeds are gain
                    realized_pnl += pnl_piece
                    token_realized[token] = token_realized.get(token, 0) + pnl_piece
                    continue
                
                remaining_to_sell = float(qty)
                sell_price = float(price)
                
                while remaining_to_sell > 0 and positions[token]:
                    # Get lot based on method
                    lot_idx = 0 if method == "FIFO" else -1
                    lot = positions[token][lot_idx]
                    
                    lot_qty = lot["qty"]
                    lot_cost = lot["cost"]
                    
                    # Determine how much to sell from this lot
                    qty_to_sell = min(remaining_to_sell, lot_qty)
                    
                    # Calculate PnL for this portion
                    proceeds = qty_to_sell * sell_price
                    cost_basis = qty_to_sell * lot_cost
                    pnl_piece = proceeds - cost_basis
                    
                    realized_pnl += pnl_piece
                    token_realized[token] = token_realized.get(token, 0) + pnl_piece
                    
                    print(f"Sold {qty_to_sell} {token}: ${proceeds:.2f} proceeds - ${cost_basis:.2f} cost = ${pnl_piece:.2f} PnL")
                    
                    # Update lot and remaining
                    lot["qty"] -= qty_to_sell
                    remaining_to_sell -= qty_to_sell
                    
                    # Remove empty lots
                    if lot["qty"] <= 0:
                        positions[token].pop(lot_idx)
        
        # --- ACB (Average Cost Basis) Logic ---
        elif method == "ACB":
            if is_buy:
                # Update average cost basis
                current_qty, current_total_cost = avg_costs.get(token, (0, 0))
                new_qty = current_qty + qty
                new_total_cost = current_total_cost + (qty * price)
                avg_costs[token] = (new_qty, new_total_cost)
                print(f"ACB updated for {token}: {new_qty} units, avg cost = ${new_total_cost/new_qty:.4f}")
                
            elif is_sell:
                current_qty, current_total_cost = avg_costs.get(token, (0, 0))
                
                if current_qty <= 0:
                    print(f"WARNING: Selling {qty} {token} with no ACB position!")
                    # Treat as all gain
                    pnl_piece = qty * price
                    realized_pnl += pnl_piece
                    token_realized[token] = token_realized.get(token, 0) + pnl_piece
                    continue
                
                # Calculate average cost
                avg_cost = current_total_cost / current_qty if current_qty > 0 else 0
                
                # Calculate PnL
                qty_to_sell = min(qty, current_qty)  # Can't sell more than we have
                proceeds = qty_to_sell * price
                cost_basis = qty_to_sell * avg_cost
                pnl_piece = proceeds - cost_basis
                
                realized_pnl += pnl_piece
                token_realized[token] = token_realized.get(token, 0) + pnl_piece
                
                print(f"ACB sale: {qty_to_sell} {token} @ ${price} vs avg cost ${avg_cost:.4f} = ${pnl_piece:.2f} PnL")
                
                # Update position
                new_qty = max(0, current_qty - qty_to_sell)
                new_total_cost = max(0, current_total_cost - (qty_to_sell * avg_cost))
                avg_costs[token] = (new_qty, new_total_cost)
    
    # --- Calculate Unrealized PnL with CURRENT PRICES ---
    print("\nCalculating unrealized PnL with current market prices...")
    unrealized_pnl = 0
    token_holdings = {}
    current_prices = {}
    
    # Collect tokens for current price lookup
    tokens_for_current_prices = []
    if method in ["FIFO", "LIFO"]:
        for token, lots in positions.items():
            if lots:
                # Get token info from the dataframe
                token_row = df[df["token_symbol"] == token].iloc[0]
                tokens_for_current_prices.append({
                    "symbol": token,
                    "address": token_row.get("token_address", ""),
                    "blockchain": token_row.get("blockchain", "ethereum")
                })
    
    elif method == "ACB":
        for token, (total_qty, _) in avg_costs.items():
            if total_qty > 0:
                token_row = df[df["token_symbol"] == token].iloc[0]
                tokens_for_current_prices.append({
                    "symbol": token,
                    "address": token_row.get("token_address", ""),
                    "blockchain": token_row.get("blockchain", "ethereum")
                })
    
    # Fetch current prices if analyzer is available
    if analyzer and tokens_for_current_prices:
        try:
            current_prices = analyzer.get_current_prices(tokens_for_current_prices)
            print(f"Fetched current prices for {len(current_prices)} tokens")
        except Exception as e:
            print(f"Error fetching current prices: {e}")
            # Fallback to last transaction prices
            for token in set(df["token_symbol"]):
                token_df = df[df["token_symbol"] == token]
                if not token_df.empty:
                    token_addr = token_df.iloc[-1]["token_address"]
                    current_prices[token_addr] = token_df.iloc[-1]["price_usd"]
    else:
        # Fallback: use last transaction prices
        print("Using last transaction prices as current prices")
        for token in set(df["token_symbol"]):
            token_df = df[df["token_symbol"] == token]
            if not token_df.empty:
                token_addr = token_df.iloc[-1]["token_address"]
                current_prices[token_addr] = token_df.iloc[-1]["price_usd"]
    
    # Calculate unrealized PnL with current prices
    if method in ["FIFO", "LIFO"]:
        for token, lots in positions.items():
            if not lots:
                continue
                
            token_df = df[df["token_symbol"] == token]
            if token_df.empty:
                continue
            
            token_addr = token_df.iloc[0]["token_address"]
            current_price = current_prices.get(token_addr, 0)
            
            if current_price <= 0:
                print(f"No current price available for {token}, skipping unrealized PnL")
                continue
            
            total_qty = sum(lot["qty"] for lot in lots)
            total_cost_basis = sum(lot["qty"] * lot["cost"] for lot in lots)
            avg_cost = total_cost_basis / total_qty if total_qty > 0 else 0
            
            current_value = total_qty * current_price
            unrealized_pnl_token = current_value - total_cost_basis
            
            unrealized_pnl += unrealized_pnl_token
            token_unrealized[token] = unrealized_pnl_token
            token_holdings[token] = {
                "qty": total_qty,
                "avg_cost": avg_cost,
                "current_price": current_price,
                "current_value": current_value
            }
            
            print(f"{token}: {total_qty:.4f} units @ avg ${avg_cost:.4f}, current ${current_price:.4f} = ${unrealized_pnl_token:.2f} unrealized")
    
    elif method == "ACB":
        for token, (total_qty, total_cost_basis) in avg_costs.items():
            if total_qty <= 0:
                continue
                
            token_df = df[df["token_symbol"] == token]
            if token_df.empty:
                continue
            
            token_addr = token_df.iloc[0]["token_address"]
            current_price = current_prices.get(token_addr, 0)
            
            if current_price <= 0:
                print(f"No current price available for {token}, skipping unrealized PnL")
                continue
            
            avg_cost = total_cost_basis / total_qty if total_qty > 0 else 0
            
            current_value = total_qty * current_price
            unrealized_pnl_token = current_value - total_cost_basis
            
            unrealized_pnl += unrealized_pnl_token
            token_unrealized[token] = unrealized_pnl_token
            token_holdings[token] = {
                "qty": total_qty,
                "avg_cost": avg_cost,
                "current_price": current_price,
                "current_value": current_value
            }
            
            print(f"{token}: {total_qty:.4f} units @ avg ${avg_cost:.4f}, current ${current_price:.4f} = ${unrealized_pnl_token:.2f} unrealized")
    
    # --- Build detailed breakdown ---
    all_tokens = set(token_realized.keys()).union(token_unrealized.keys())
    token_data = []
    
    for token in all_tokens:
        holdings = token_holdings.get(token, {})
        token_data.append({
            "Token": token,
            "Realized PnL (USD)": round(token_realized.get(token, 0), 2),
            "Unrealized PnL (USD)": round(token_unrealized.get(token, 0), 2),
            "Current Holdings": round(holdings.get("qty", 0), 6),
            "Avg Cost": round(holdings.get("avg_cost", 0), 4),
            "Current Price": round(holdings.get("current_price", 0), 4),
            "Current Value": round(holdings.get("current_value", 0), 2)
        })
    
    breakdown_df = pd.DataFrame(token_data)
    if not breakdown_df.empty:
        breakdown_df = breakdown_df.sort_values("Realized PnL (USD)", ascending=False)
    
    print(f"\nFinal Results:")
    print(f"Realized PnL: ${realized_pnl:.2f}")
    print(f"Unrealized PnL: ${unrealized_pnl:.2f}")
    print(f"Total PnL: ${realized_pnl + unrealized_pnl:.2f}")
    
    return realized_pnl, unrealized_pnl, breakdown_df


# -------------------------------
# PnL VALIDATION FUNCTION
# -------------------------------
def validate_pnl_calculation(df, realized_pnl, unrealized_pnl, breakdown_df):
    """
    Validate PnL calculations for reasonableness and consistency.
    """
    validation_results = []
    
    # Check 1: Total PnL components should sum correctly
    breakdown_realized_sum = breakdown_df["Realized PnL (USD)"].sum() if not breakdown_df.empty else 0
    breakdown_unrealized_sum = breakdown_df["Unrealized PnL (USD)"].sum() if not breakdown_df.empty else 0
    
    validation_results.append({
        "Check": "PnL Components Sum",
        "Expected Realized": realized_pnl,
        "Breakdown Realized": breakdown_realized_sum,
        "Expected Unrealized": unrealized_pnl,
        "Breakdown Unrealized": breakdown_unrealized_sum,
        "Pass": abs(realized_pnl - breakdown_realized_sum) < 0.01 and abs(unrealized_pnl - breakdown_unrealized_sum) < 0.01
    })
    
    # Check 2: No negative holdings
    if not breakdown_df.empty:
        negative_holdings = breakdown_df[breakdown_df["Current Holdings"] < 0]
        validation_results.append({
            "Check": "No Negative Holdings",
            "Negative Count": len(negative_holdings),
            "Pass": len(negative_holdings) == 0
        })
    
    # Check 3: Reasonable price ranges
    if not df.empty:
        price_stats = df["price_usd"].describe()
        validation_results.append({
            "Check": "Price Range Reasonableness",
            "Min Price": price_stats["min"],
            "Max Price": price_stats["max"],
            "Pass": price_stats["min"] >= 0 and price_stats["max"] < 1000000  # Basic sanity check
        })
    
    return pd.DataFrame(validation_results)

In [2]:
analyzer = ExtendedMoralisAnalyzer(api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6ImIxMzVkNTc0LTE3MDItNGFlOS05ZGFhLWM5ZGI3NDZmOWQ2ZCIsIm9yZ0lkIjoiNDY5MDQ1IiwidXNlcklkIjoiNDgyNTI5IiwidHlwZUlkIjoiMmNlOWY1NTItOGQ1Ni00YzgyLTk1ZTctOWY4ZDkyYWE4MGJhIiwidHlwZSI6IlBST0pFQ1QiLCJpYXQiOjE3NTY5NTExMjgsImV4cCI6NDkxMjcxMTEyOH0.-zQFPB_ZeVUQ2daD6jgnLMsUhcyRpX7ghUnTRaxAlNw")
df = analyzer.get_detailed_data_for_wallet("0x0b23B218c08dD2156CEb19aF5bB765096D73BA70", max_per_chain=30, chains=["bnb","optimism","arbitrum"])

realized, unrealized, breakdown = calculate_pnl_improved(df, method="FIFO", analyzer=analyzer)

validation = validate_pnl_calculation(df, realized, unrealized, breakdown)

print("Validation Results")
print(validation)
print("\nBreakdown")
print(breakdown.head())


Fetching ERC20 transfers on optimism...
Fetching ERC20 transfers on arbitrum...
Processing 12 transactions using FIFO method...
Skipping invalid transaction: qty=84.562542, price=0.0, type=deposit
Skipping invalid transaction: qty=1.30867, price=0.0, type=deposit
Skipping invalid transaction: qty=3000.0, price=0.0, type=deposit
Skipping invalid transaction: qty=84.0, price=0.0, type=withdrawal
Added lot: 29.765 USDT @ $1.000413034018476
Added lot: 86.91201014728924 WCT @ $0.34449833013901
Sold 29.765 USDT: $29.78 proceeds - $29.78 cost = $0.00 PnL
Skipping invalid transaction: qty=0.0001, price=0.0, type=deposit
Sold 86.91201014728924 WCT: $29.94 proceeds - $29.94 cost = $0.00 PnL
Added lot: 44.88 WCT @ $0.3292533159924128
Added lot: 0.003635087554407808 WCT @ $0.2955672492818812
Added lot: 86.91201014728924 WCT @ $0.289267648020578

Calculating unrealized PnL with current market prices...
Fetched current prices for 0 tokens
No current price available for WCT, skipping unrealized PnL

