In [None]:
################### PIPELINE WORKFLOW ###################################################################
# 1) Generate buy/sell signals - based on Optimized weights for each ticker. Sav ethe csv file
# 2) Feed the csv file to LLM for analysis
# 3) Monitor using promethesus
# 4) Visualize using Grafana
##################################################################################################

In [None]:
# Ticker	Sector	Buy Z Threshold	Sell Z Threshold	Hold Z Low	Hold Z High
# ASAN	Technology	-2.358	-0.961	-2.358	-0.961
# BABA	E-Commerce	0.032	0.032	0.032	0.032
# BW	Technology	1.306	1.306	1.306	1.306
# CVNA	E-Commerce	-2.035	-0.711	-2.035	-0.711
# GOOG	Technology	0.617	0.617	0.617	0.617
# HPE	Hardware	-0.461	-0.461	-0.461	-0.461
# JMIA	E-Commerce	1.378	1.378	1.378	1.378
# MSFT	Technology	-1.186	-0.717	-1.186	-0.717
# NVDA	Technology	-0.702	-0.702	-0.702	-0.702
# PATH	Technology	-1.368	-0.717	-1.368	-0.717
# PLTR	Technology	-1.185	-0.478	-1.185	-0.478
# PTON	Consumer	0.52	0.52	0.52	0.52
# RGTI	Technology	-1.006	-0.844	-1.006	-0.844
# TSLA	Technology	-0.273	-0.273	-0.273	-0.273
# TTD	Technology	-1.858	-1.279	-1.858	-1.279
# TWLO	Technology	-2.105	-1.195	-2.105	-1.195


In [1]:
# TRADING SYSTEM WITH TICKER-SPECIFIC THRESHOLDS
# WARNING: Based on system with 0% win rate - USE AT YOUR OWN RISK
# =============================================================================

import requests
import pandas as pd
import numpy as np
import math
import csv
from scipy.stats import skew, kurtosis
from statsmodels.tsa.stattools import adfuller
from datetime import datetime, timedelta
import time
import os
import traceback
import re
from pathlib import Path
import statistics
import warnings
import uuid
warnings.filterwarnings('ignore')

# =============================================================================
# TICKER-SPECIFIC THRESHOLDS - FROM FAILED SYSTEM ANALYSIS
# =============================================================================

TICKER_THRESHOLDS = {
    'ASAN': {'buy_z': -2.358, 'sell_z': -0.961, 'hold_low': -2.358, 'hold_high': -0.961},
    'BABA': {'buy_z': 0.05, 'sell_z': 1.643, 'hold_low': 0.581, 'hold_high': 1.112},
    'BW': {'buy_z': 1.117, 'sell_z': 3.281, 'hold_low': 1.838, 'hold_high': 2.559},
    'CVNA': {'buy_z': -2.035, 'sell_z': -0.711, 'hold_low': -2.035, 'hold_high': -0.711},
    'GOOG': {'buy_z': 0.885, 'sell_z': 2.044, 'hold_low': 1.271, 'hold_high': 1.657},
    'HPE': {'buy_z': -0.461, 'sell_z': -0.461, 'hold_low': -0.461, 'hold_high': -0.461},
    'JMIA': {'buy_z': 1.222, 'sell_z': 3.792, 'hold_low': 2.079, 'hold_high': 2.935},
    'MSFT': {'buy_z': -1.186, 'sell_z': -0.717, 'hold_low': -1.186, 'hold_high': -0.717},
    'NVDA': {'buy_z': -0.101, 'sell_z': 1.637, 'hold_low': 0.479, 'hold_high': -1.058},
    'PATH': {'buy_z': -1.368, 'sell_z': -0.717, 'hold_low': -1.368, 'hold_high': -0.717},
    'PLTR': {'buy_z': -1.185, 'sell_z': -0.478, 'hold_low': -1.185, 'hold_high': -0.478},
    'PTON': {'buy_z': 0.175, 'sell_z': 3.508, 'hold_low': 1.286, 'hold_high': 2.397},
    'RGTI': {'buy_z': -1.006, 'sell_z': -0.844, 'hold_low': -1.006, 'hold_high': -0.844},
    'TSLA': {'buy_z': 0.039, 'sell_z': 2.324, 'hold_low': 0.801, 'hold_high': 1.562},
    'TTD': {'buy_z': -1.858, 'sell_z': -1.279, 'hold_low': -1.858, 'hold_high': -1.279},
    'TWLO': {'buy_z': -2.105, 'sell_z': -1.195, 'hold_low': -2.105, 'hold_high': -1.195}
}

# Weight schemes (keeping original structure)
WEIGHT_SCHEMES = {
    'optimized': {'z_score': 0.55, 'volume': 0.10, 'skewness': 0.08, 'kurtosis': 0.25, 'adf': 0.02}
}

SECTOR_MAPPING = {
    'MSFT': 'Technology', 'GOOG': 'Technology', 'NVDA': 'Technology', 'TSLA': 'Technology',
    'ASAN': 'Technology', 'PLTR': 'Technology', 'TTD': 'Technology', 'TWLO': 'Technology',
    'PATH': 'Technology', 'RGTI': 'Technology', 'BW': 'Technology',
    'BABA': 'E-Commerce', 'CVNA': 'E-Commerce', 'JMIA': 'E-Commerce',
    'HPE': 'Hardware', 'PTON': 'Consumer'
}

# Configuration setup
BASE_DIR = Path(__file__).parent if '__file__' in globals() else Path.cwd()
data_dir = Path("D:/1/data_LLM")
data_dir.mkdir(parents=True, exist_ok=True)
MASTER_CSV_FILE = data_dir / "ticker_specific_signals_LLM.csv"

# API key setup
def get_api_key(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read().strip()
    except FileNotFoundError:
        return os.getenv('FMP_API_KEY', "your_api_key_here")

api_key_paths = [
    BASE_DIR / "FMP_api_Key.txt",
    Path("D:/Monte_Carlo/FMP_api_Key.txt")
]

api_key = None
for key_path in api_key_paths:
    if key_path.exists():
        api_key = get_api_key(key_path)
        break
if not api_key:
    api_key = "your_api_key_here"

# Portfolio tracking
PORTFOLIO_CONFIG = {
    'total_capital': 100000,
    'current_cash': 100000,
    'initial_capital': 100000
}

POSITIONS = {}
TRADE_HISTORY = []

def get_timestamp_string():
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]

def get_unique_run_id():
    return str(uuid.uuid4())[:8]

# =============================================================================
# TICKER-SPECIFIC SIGNAL GENERATION
# =============================================================================

def generate_ticker_specific_signal(symbol, z_score, volume_spike, skewness, kurtosis, adf_p_value):
    """Generate signal using ticker-specific thresholds"""

    try:
        # Get ticker-specific thresholds
        if symbol not in TICKER_THRESHOLDS:
            print(f"WARNING: No thresholds for {symbol}, using default")
            thresholds = {'buy_z': -2.0, 'sell_z': 2.0, 'hold_low': -2.0, 'hold_high': 2.0}
        else:
            thresholds = TICKER_THRESHOLDS[symbol]

        # Apply ticker-specific logic
        buy_threshold = thresholds['buy_z']
        sell_threshold = thresholds['sell_z']

        # Signal generation based on Z-score vs ticker-specific thresholds
        if z_score <= buy_threshold:
            signal = "BUY"
            confidence = "HIGH" if z_score < buy_threshold - 0.5 else "MODERATE"
        elif z_score >= sell_threshold:
            signal = "SELL"
            confidence = "HIGH" if z_score > sell_threshold + 0.5 else "MODERATE"
        else:
            signal = "HOLD"
            confidence = "LOW"

        # Calculate signal strength based on distance from thresholds
        if signal == "BUY":
            signal_strength = min(1.0, abs(z_score - buy_threshold) / 2.0)
        elif signal == "SELL":
            signal_strength = -min(1.0, abs(z_score - sell_threshold) / 2.0)
        else:
            signal_strength = 0.0

        # Create reason and logic strings
        reason = f"{signal} ({confidence}) - Ticker: {symbol}, Z: {z_score:.3f}, Threshold: {buy_threshold if signal=='BUY' else sell_threshold:.3f}"
        logic = f"{signal}: Z-Score={z_score:.3f}, Threshold={buy_threshold if signal=='BUY' else sell_threshold:.3f}, Strength={signal_strength:.3f}"
        factors = f"Z({z_score:.2f}), Threshold({buy_threshold:.2f}/{sell_threshold:.2f}), Vol({volume_spike:.2f})"

        return {
            'signal_strength': signal_strength,
            'signal': signal,
            'confidence': confidence,
            'reason': reason,
            'logic': logic,
            'factors': factors,
            'buy_logic': logic if signal == "BUY" else "",
            'sell_logic': logic if signal == "SELL" else "",
            'validation_message': f"Ticker-specific thresholds applied - Buy: {buy_threshold:.3f}, Sell: {sell_threshold:.3f}",
            'buy_threshold': buy_threshold,
            'sell_threshold': sell_threshold
        }

    except Exception as e:
        print(f"Error in ticker-specific signal generation: {e}")
        return {
            'signal_strength': 0.0, 'signal': "HOLD", 'confidence': "LOW",
            'reason': f"Error: {e}", 'logic': f"Error: {e}", 'factors': f"Error: {e}",
            'buy_logic': "", 'sell_logic': "", 'validation_message': f"Error: {e}",
            'buy_threshold': 0.0, 'sell_threshold': 0.0
        }

# =============================================================================
# DATA FETCHING AND ANALYSIS
# =============================================================================

def get_minute_data_with_timing(symbol, start_date, end_date, api_key):
    fetch_start = time.time()
    try:
        url = f"https://financialmodelingprep.com/api/v3/historical-chart/15min/{symbol}?from={start_date}&to={end_date}&apikey={api_key}"
        print(f"  Fetching {symbol}...")

        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()
            if not data:
                return pd.DataFrame(), 0.0

            df = pd.DataFrame(data)
            required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
            if not all(col in df.columns for col in required_cols):
                return pd.DataFrame(), 0.0

            df['datetime'] = pd.to_datetime(df['date'])
            df = df.sort_values('datetime').reset_index(drop=True)

            fetch_duration = (time.time() - fetch_start) * 1000
            print(f"  Got {len(df)} data points for {symbol} in {fetch_duration:.1f}ms")
            return df, fetch_duration
        else:
            print(f"  API error {response.status_code} for {symbol}")
            return pd.DataFrame(), 0.0

    except Exception as e:
        print(f"  Exception for {symbol}: {e}")
        return pd.DataFrame(), 0.0

def calculate_metrics_with_timing(prices, returns, volumes):
    calc_start = time.time()
    try:
        # Z-Score calculation
        if len(prices) > 1 and np.std(prices) > 0:
            z_score = float((prices[-1] - np.mean(prices)) / np.std(prices))
        else:
            z_score = 0.0

        # Skewness & Kurtosis
        if len(returns) >= 100:
            neg_skew = float(skew(returns))
            fat_tails = float(kurtosis(returns))
        else:
            neg_skew = 0.0
            fat_tails = 0.0

        # ADF Test
        if len(prices) >= 200:
            try:
                adf_result = adfuller(prices)
                adf_p = float(adf_result[1])
                adf_statistic = float(adf_result[0])
            except:
                adf_p = 1.0
                adf_statistic = 0.0
        else:
            adf_p = 1.0
            adf_statistic = 0.0

        # Volume Spike
        if len(volumes) > 60:
            recent_avg = np.mean(volumes[-60:-1])
            if recent_avg > 0:
                volume_ratio = float(volumes[-1] / recent_avg)
            else:
                volume_ratio = 1.0
        else:
            volume_ratio = 1.0

        calc_duration = (time.time() - calc_start) * 1000

        return {
            'z_score': z_score,
            'skewness': neg_skew,
            'kurtosis': fat_tails,
            'adf_p_value': adf_p,
            'adf_statistic': adf_statistic,
            'volume_spike': volume_ratio
        }, calc_duration

    except Exception as e:
        print(f"Error in metrics: {e}")
        calc_duration = (time.time() - calc_start) * 1000
        return {
            'z_score': 0.0, 'skewness': 0.0, 'kurtosis': 0.0,
            'adf_p_value': 1.0, 'adf_statistic': 0.0, 'volume_spike': 1.0
        }, calc_duration

# =============================================================================
# PORTFOLIO AND TRADE MANAGEMENT
# =============================================================================

def execute_trade(symbol, signal, signal_strength, current_price, current_datetime):
    """Execute trades based on ticker-specific signals"""
    try:
        trades = []
        exit_trigger = "None"

        if signal == "HOLD":
            trades.append("HOLD - No action")
        elif signal in ["BUY", "SELL"]:
            # Simple trade logging (not actual execution)
            position_type = 'LONG' if signal == 'BUY' else 'SHORT'
            trades.append(f"SIGNAL: {signal} {symbol} @ ${current_price:.2f} | Strength: {signal_strength:.3f}")

            # Update positions tracking
            POSITIONS[symbol] = {
                'entry_date': current_datetime,
                'entry_price': current_price,
                'signal': signal,
                'position_type': position_type,
                'signal_strength': signal_strength
            }

        return trades, exit_trigger

    except Exception as e:
        print(f"Error in trade execution: {e}")
        return ["ERROR"], "Error"

def get_portfolio_metrics():
    """Get basic portfolio metrics"""
    try:
        return {
            'total_trades': len(TRADE_HISTORY),
            'win_rate': 0.0,  # Not calculated in this simple version
            'total_return_pct': 0.0  # Not calculated in this simple version
        }
    except:
        return {'total_trades': 0, 'win_rate': 0.0, 'total_return_pct': 0.0}

# =============================================================================
# CSV WRITING FUNCTION
# =============================================================================

def write_results_to_csv(results, run_timestamp, start_date, end_date, ticker_list):
    """Write results to CSV with proper appending"""

    try:
        print(f"\nWriting {len(results)} results to CSV...")

        if not results:
            print("No results to write")
            return None

        # Define column order
        columns = [
            'Run_ID', 'Run_Timestamp', 'Symbol', 'Date', 'Close_Price', 'Volume',
            'Z_Score', 'Skewness', 'Kurtosis', 'ADF_p_value', 'Volume_Spike',
            'Signal_Strength', 'Signal', 'Reason', 'Confidence', 'Logic', 'Factors',
            'Buy_Logic', 'Sell_Logic', 'Validation_Message', 'Buy_Threshold', 'Sell_Threshold',
            'Sector', 'Analysis_Timestamp', 'Tickers_Analyzed', 'Portfolio_Positions'
        ]

        # Check if file exists and has header
        file_exists = MASTER_CSV_FILE.exists()
        write_header = not file_exists

        if file_exists:
            try:
                with open(MASTER_CSV_FILE, 'r', encoding='utf-8') as f:
                    first_line = f.readline().strip()
                    if not first_line or len(first_line.split(',')) < 10:
                        write_header = True
            except:
                write_header = True

        # Write to CSV
        with open(MASTER_CSV_FILE, 'a', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)

            # Write header if needed
            if write_header:
                writer.writerow(columns)
                print("Header written to CSV")

            # Write data rows
            for i, result in enumerate(results):
                row = [
                    result.get('Run_ID', ''),
                    result.get('Run_Timestamp', ''),
                    result.get('Symbol', ''),
                    result.get('Date', ''),
                    result.get('Close_Price', 0.0),
                    result.get('Volume', 0),
                    result.get('Z_Score', 0.0),
                    result.get('Skewness', 0.0),
                    result.get('Kurtosis', 0.0),
                    result.get('ADF_p_value', 0.0),
                    result.get('Volume_Spike', 0.0),
                    result.get('Signal_Strength', 0.0),
                    result.get('Signal', ''),
                    result.get('Reason', ''),
                    result.get('Confidence', ''),
                    result.get('Logic', ''),
                    result.get('Factors', ''),
                    result.get('Buy_Logic', ''),
                    result.get('Sell_Logic', ''),
                    result.get('Validation_Message', ''),
                    result.get('Buy_Threshold', 0.0),
                    result.get('Sell_Threshold', 0.0),
                    result.get('Sector', ''),
                    result.get('Analysis_Timestamp', ''),
                    result.get('Tickers_Analyzed', 0),
                    result.get('Portfolio_Positions', 0)
                ]

                writer.writerow(row)
                print(f"   Written: {result['Symbol']} -> {result['Signal']} @ ${result['Close_Price']:.2f}")

            # Force write to disk
            csvfile.flush()
            os.fsync(csvfile.fileno())

        print(f"CSV write successful: {MASTER_CSV_FILE}")
        print(f"File size: {MASTER_CSV_FILE.stat().st_size:,} bytes")

        return MASTER_CSV_FILE

    except Exception as e:
        print(f"Error writing to CSV: {e}")
        traceback.print_exc()
        return None

# =============================================================================
# MAIN ANALYSIS FUNCTION
# =============================================================================

def analyze_stock_with_ticker_thresholds(symbol, start_date, end_date, sequence_number, run_id):
    """Analyze stock using ticker-specific thresholds"""

    try:
        print(f"\nAnalyzing {symbol} (#{sequence_number}) with ticker-specific thresholds")

        # Get threshold info for display
        thresholds = TICKER_THRESHOLDS.get(symbol, {'buy_z': 'N/A', 'sell_z': 'N/A'})
        print(f"  Thresholds: Buy Z <= {thresholds['buy_z']:.3f}, Sell Z >= {thresholds['sell_z']:.3f}")

        stock_start_time = time.time()
        analysis_timestamp = get_timestamp_string()

        # Data fetching
        data, fetch_duration = get_minute_data_with_timing(symbol, start_date, end_date, api_key)

        if data.empty:
            print(f"   No data for {symbol}")
            return None

        # Data processing
        data['return'] = data['close'].pct_change()
        latest_row = data.iloc[-1]

        # Date handling
        current_analysis_date = datetime.now().strftime('%Y-%m-%d')

        if hasattr(latest_row['datetime'], 'strftime'):
            latest_datetime_str = latest_row['datetime'].strftime('%Y-%m-%d %H:%M:%S')
        else:
            latest_datetime_str = str(latest_row['datetime'])

        window_data = data.tail(1440)  # Last 24 hours of 15-min data

        if len(window_data) < 100:
            print(f"   Insufficient data: {len(window_data)} minutes")
            return None

        # Statistical calculations
        prices = window_data['close'].values
        returns = window_data['return'].dropna().values
        volumes = window_data['volume'].values

        metrics, stats_duration = calculate_metrics_with_timing(prices, returns, volumes)

        # Generate ticker-specific signal
        signal_start = time.time()
        signal_result = generate_ticker_specific_signal(
            symbol, metrics['z_score'], metrics['volume_spike'],
            metrics['skewness'], metrics['kurtosis'], metrics['adf_p_value']
        )
        signal_duration = (time.time() - signal_start) * 1000

        # Execute trade
        trades, exit_trigger = execute_trade(
            symbol, signal_result['signal'], signal_result['signal_strength'],
            float(latest_row['close']), latest_datetime_str
        )

        total_analysis_duration = (time.time() - stock_start_time) * 1000

        # Create result dictionary
        result = {
            'Run_ID': str(run_id),
            'Run_Timestamp': '',  # Will be filled by main function
            'Symbol': str(symbol),
            'Date': str(current_analysis_date),
            'Close_Price': float(latest_row['close']),
            'Volume': int(latest_row['volume']),
            'Z_Score': float(metrics['z_score']),
            'Skewness': float(metrics['skewness']),
            'Kurtosis': float(metrics['kurtosis']),
            'ADF_p_value': float(metrics['adf_p_value']),
            'Volume_Spike': float(metrics['volume_spike']),
            'Signal_Strength': float(signal_result['signal_strength']),
            'Signal': str(signal_result['signal']),
            'Reason': str(signal_result['reason']),
            'Confidence': str(signal_result['confidence']),
            'Logic': str(signal_result['logic']),
            'Factors': str(signal_result['factors']),
            'Buy_Logic': str(signal_result['buy_logic']),
            'Sell_Logic': str(signal_result['sell_logic']),
            'Validation_Message': str(signal_result['validation_message']),
            'Buy_Threshold': float(signal_result['buy_threshold']),
            'Sell_Threshold': float(signal_result['sell_threshold']),
            'Sector': str(SECTOR_MAPPING.get(symbol, 'Unknown')),
            'Analysis_Timestamp': str(analysis_timestamp),
            'Tickers_Analyzed': 0,  # Will be filled by main function
            'Portfolio_Positions': int(len(POSITIONS))
        }

        print(f"   {result['Signal']} | ${result['Close_Price']:.2f}")
        print(f"   Z-Score: {result['Z_Score']:.3f} | Thresholds: {signal_result['buy_threshold']:.3f}/{signal_result['sell_threshold']:.3f}")
        print(f"   Strength: {result['Signal_Strength']:.3f} | Confidence: {result['Confidence']}")

        return result

    except Exception as e:
        print(f"   Error analyzing {symbol}: {e}")
        traceback.print_exc()
        return None

# =============================================================================
# MAIN EXECUTION FUNCTION
# =============================================================================

def run_ticker_specific_analysis(ticker_list, start_date, end_date):
    """Run analysis with ticker-specific thresholds"""

    run_id = get_unique_run_id()
    run_timestamp = datetime.now().strftime('%d-%m-%Y %H:%M:%S')

    print(f"TICKER-SPECIFIC TRADING SYSTEM")
    print(f"=" * 70)
    print(f"WARNING: Using thresholds from FAILED system with 0% win rate!")
    print(f"Run ID: {run_id}")
    print(f"Stocks: {len(ticker_list)}")
    print(f"Period: {start_date} to {end_date}")
    print(f"Output: {MASTER_CSV_FILE}")
    print(f"=" * 70)

    # Show threshold summary
    print(f"\nTicker-Specific Thresholds:")
    print(f"{'Ticker':<6} {'Buy Z':<8} {'Sell Z':<8} {'Sector':<12}")
    print(f"-" * 40)
    for ticker in ticker_list:
        if ticker in TICKER_THRESHOLDS:
            thresh = TICKER_THRESHOLDS[ticker]
            sector = SECTOR_MAPPING.get(ticker, 'Unknown')
            print(f"{ticker:<6} {thresh['buy_z']:<8.3f} {thresh['sell_z']:<8.3f} {sector:<12}")
        else:
            print(f"{ticker:<6} {'N/A':<8} {'N/A':<8} {'Unknown':<12}")

    results = []

    for i, ticker in enumerate(ticker_list, 1):
        result = analyze_stock_with_ticker_thresholds(ticker, start_date, end_date, i, run_id)

        if result is not None:
            # Add metadata
            result['Run_Timestamp'] = run_timestamp
            result['Tickers_Analyzed'] = len(ticker_list)
            results.append(result)

        time.sleep(1.0)  # Rate limiting

    # Write results to CSV
    if results:
        csv_file = write_results_to_csv(results, run_timestamp, start_date, end_date, ticker_list)

        # Summary
        signals = {}
        for result in results:
            signal = result['Signal']
            signals[signal] = signals.get(signal, 0) + 1

        print(f"\nRESULTS SUMMARY:")
        print(f"Run ID: {run_id}")
        print(f"BUY: {signals.get('BUY', 0)}")
        print(f"SELL: {signals.get('SELL', 0)}")
        print(f"HOLD: {signals.get('HOLD', 0)}")
        print(f"Total: {len(results)}")

        if csv_file:
            print(f"\nCSV saved to: {csv_file}")
            print(f"File size: {csv_file.stat().st_size:,} bytes")

        # Display signal breakdown by ticker
        print(f"\nDetailed Results:")
        print(f"{'Ticker':<6} {'Price':<8} {'Signal':<6} {'Strength':<8} {'Z-Score':<8} {'Threshold':<10}")
        print(f"-" * 60)
        for result in results:
            thresh_str = f"{result['Buy_Threshold']:.2f}/{result['Sell_Threshold']:.2f}"
            print(f"{result['Symbol']:<6} ${result['Close_Price']:<7.2f} {result['Signal']:<6} {result['Signal_Strength']:<7.3f} {result['Z_Score']:<7.3f} {thresh_str:<10}")

        return results, csv_file
    else:
        print("No results to save")
        return [], None

# =============================================================================
# MAIN EXECUTION
# =============================================================================

if __name__ == "__main__":
    print(f"INITIALIZING TICKER-SPECIFIC TRADING SYSTEM")
    print(f"WARNING: Based on failed system analysis - use extreme caution!")

    start_date = "2025-07-14"
    end_date = "2025-08-29"

    ticker_list = ['RGTI', 'PATH', "MSFT", "GOOG", "NVDA", "BABA", "CVNA", "HPE",
                   'ASAN', 'PLTR', 'TTD', 'TWLO', 'BW', 'PTON', "TSLA", 'JMIA']

    try:
        results, csv_file = run_ticker_specific_analysis(ticker_list, start_date, end_date)

        if results and csv_file:
            print(f"\nSUCCESS!")
            print(f"Results written to: {csv_file}")
            print(f"Total signals: {len(results)}")
            print(f"\nREMEMBER: These thresholds come from a 0% win rate system!")
            print(f"Test thoroughly before any live implementation!")
        else:
            print("Analysis failed")

    except Exception as e:
        print(f"Error: {e}")
        traceback.print_exc()

INITIALIZING TICKER-SPECIFIC TRADING SYSTEM
TICKER-SPECIFIC TRADING SYSTEM
Run ID: 46e33e16
Stocks: 16
Period: 2025-07-14 to 2025-08-29
Output: D:\1\data\ticker_specific_signals.csv

Ticker-Specific Thresholds:
Ticker Buy Z    Sell Z   Sector      
----------------------------------------
RGTI   -1.006   -0.844   Technology  
PATH   -1.368   -0.717   Technology  
MSFT   -1.186   -0.717   Technology  
GOOG   0.885    2.044    Technology  
NVDA   -0.101   1.637    Technology  
BABA   0.050    1.643    E-Commerce  
CVNA   -2.035   -0.711   E-Commerce  
HPE    -0.461   -0.461   Hardware    
ASAN   -2.358   -0.961   Technology  
PLTR   -1.185   -0.478   Technology  
TTD    -1.858   -1.279   Technology  
TWLO   -2.105   -1.195   Technology  
BW     1.117    3.281    Technology  
PTON   0.175    3.508    Consumer    
TSLA   0.039    2.324    Technology  
JMIA   1.222    3.792    E-Commerce  

Analyzing RGTI (#1) with ticker-specific thresholds
  Thresholds: Buy Z <= -1.006, Sell Z >= -0.844
 

In [None]:
# CSV file saved in D: data_LLM folder with ticker_specific_signals_LLM . This will be analyzed by Portfolio  metrics

In [None]:
# The complete bi-directional trading system is now ready. It implements your requirements:
# Trading Logic:
#
# BUY signal → LONG position → Close on next SELL/HOLD
# SELL signal → SHORT position → Close on next BUY/HOLD
# No consecutive positions in same stock (if just closed LONG AAPL, next SELL signal for AAPL gets SKIPPED)
#
# Features Included:
#
# $2 commission per trade
# Advanced metrics calculation (Sharpe, VaR, Alpha, Max Drawdown)
# Colorful summary table output
# CSV export of all trades
# Open position flagging (trades that never found exit signals)
#
# What the system will do:
#
# Load your CSV signals and sort by date
# Process each signal, tracking last action per stock
# Skip consecutive signals for same stock
# Execute paired trades (entry + exit)
# Calculate cumulative metrics for each trade
# Generate colorful performance summary
# Save detailed results to CSV

In [None]:
# no multiple positions  for the same ticker

# Final backtesting code

In [18]:
#!/usr/bin/env python3
"""
STRICT POSITION MANAGEMENT TRADING SYSTEM
- Exactly $25K per position (or closest possible)
- Maximum 4 positions total
- Strict capital validation
- Clear cash flow tracking
"""

import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime, timedelta
import time
from pathlib import Path
import warnings
from colorama import Fore, Back, Style, init
from tabulate import tabulate
warnings.filterwarnings('ignore')

init()

class StrictPositionTradingSystem:
    def __init__(self, api_key, csv_file_path):
        self.api_key = api_key
        self.csv_file_path = csv_file_path

        # Fixed portfolio parameters
        self.INITIAL_CAPITAL = 100000
        self.TARGET_POSITION_SIZE = 25000
        self.MAX_POSITIONS = 4
        self.COMMISSION = 2.0

        # Current state
        self.cash = self.INITIAL_CAPITAL
        self.positions = {}  # {symbol: position_info}
        self.completed_trades = []
        self.last_action = {}  # Track last action per stock to prevent consecutive trades
        self.price_cache = {}

        # Load and prepare signals
        self.load_signals()

        print(f"PORTFOLIO CONFIGURATION:")
        print(f"Initial Capital: ${self.INITIAL_CAPITAL:,}")
        print(f"Position Size: ${self.TARGET_POSITION_SIZE:,}")
        print(f"Max Positions: {self.MAX_POSITIONS}")
        print(f"Commission: ${self.COMMISSION} per trade")

    def load_signals(self):
        """Load and prepare signal data"""
        self.signals_df = pd.read_csv(self.csv_file_path)

        # Parse dates
        self.signals_df['Date'] = pd.to_datetime(self.signals_df['Date'], format='%d-%m-%Y')

        # Create trading datetime (use 9:30 AM market open as default)
        self.signals_df['Trading_Datetime'] = self.signals_df['Date'] + pd.Timedelta(hours=9, minutes=30)

        # Sort chronologically
        self.signals_df = self.signals_df.sort_values('Trading_Datetime').reset_index(drop=True)

        print(f"Loaded {len(self.signals_df)} signals")
        print(f"Date range: {self.signals_df['Date'].min()} to {self.signals_df['Date'].max()}")

    def get_price(self, symbol, target_date):
        """Fetch price with caching"""
        cache_key = f"{symbol}_{target_date.strftime('%Y-%m-%d')}"

        if cache_key in self.price_cache:
            return self.price_cache[cache_key]

        try:
            # Try 15-minute data first
            start_date = (target_date - timedelta(days=3)).strftime('%Y-%m-%d')
            end_date = (target_date + timedelta(days=1)).strftime('%Y-%m-%d')

            url = f"https://financialmodelingprep.com/api/v3/historical-chart/15min/{symbol}"
            params = {'from': start_date, 'to': end_date, 'apikey': self.api_key}

            response = requests.get(url, params=params, timeout=20)

            if response.status_code == 200 and response.json():
                data = response.json()
                # Find closest time match
                best_price = None
                min_diff = float('inf')

                for entry in data:
                    entry_time = datetime.strptime(entry['date'], '%Y-%m-%d %H:%M:%S')
                    time_diff = abs((entry_time - target_date).total_seconds())
                    if time_diff < min_diff:
                        min_diff = time_diff
                        best_price = float(entry['close'])

                if best_price:
                    self.price_cache[cache_key] = best_price
                    return best_price

            # Fallback to daily data
            return self.get_daily_price(symbol, target_date.date())

        except Exception as e:
            print(f"Error fetching minute data for {symbol}: {e}")
            return self.get_daily_price(symbol, target_date.date())

    def get_daily_price(self, symbol, target_date):
        """Fallback to daily price data"""
        try:
            start_date = (target_date - timedelta(days=5)).strftime('%Y-%m-%d')
            end_date = (target_date + timedelta(days=2)).strftime('%Y-%m-%d')

            url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{symbol}"
            params = {'from': start_date, 'to': end_date, 'apikey': self.api_key}

            response = requests.get(url, params=params, timeout=15)

            if response.status_code == 200:
                data = response.json()
                if 'historical' in data and data['historical']:
                    # Find exact or closest date
                    for entry in data['historical']:
                        if entry['date'] == target_date.strftime('%Y-%m-%d'):
                            price = float(entry['close'])
                            self.price_cache[f"{symbol}_{target_date}"] = price
                            return price

                    # Use first available date as fallback
                    price = float(data['historical'][0]['close'])
                    self.price_cache[f"{symbol}_{target_date}"] = price
                    return price

            return None

        except Exception as e:
            print(f"Daily price fetch failed for {symbol}: {e}")
            return None

    def can_open_position(self, symbol, signal_type):
        """Check if we can open a new position"""
        # Rule 1: No existing position in this symbol
        if symbol in self.positions:
            return False, f"Already have {self.positions[symbol]['type']} position in {symbol}"

        # Rule 2: Maximum position limit
        if len(self.positions) >= self.MAX_POSITIONS:
            return False, f"At maximum positions ({len(self.positions)}/{self.MAX_POSITIONS})"

        # Rule 3: Prevent consecutive trades in same symbol
        if symbol in self.last_action:
            last = self.last_action[symbol]
            if ((signal_type == 'LONG' and last == 'SHORT') or
                (signal_type == 'SHORT' and last == 'LONG')):
                return False, f"Last action in {symbol} was {last}, preventing consecutive {signal_type}"

        # Rule 4: Sufficient cash for position
        required_cash = self.TARGET_POSITION_SIZE + self.COMMISSION
        if self.cash < required_cash:
            return False, f"Insufficient cash (need ${required_cash:,}, have ${self.cash:,.2f})"

        return True, "OK"

    def open_long_position(self, symbol, signal_row, price):
        """Open a long position with strict $25K sizing"""
        # Calculate shares for exactly $25K position
        target_shares = int(self.TARGET_POSITION_SIZE / price)

        if target_shares <= 0:
            print(f"    Cannot open LONG {symbol} - price too high (${price:.2f})")
            return False

        # Actual position value
        position_value = target_shares * price
        total_cost = position_value + self.COMMISSION

        # Final validation
        if total_cost > self.cash:
            print(f"    Cannot open LONG {symbol} - insufficient cash")
            return False

        # Execute trade
        self.cash -= total_cost

        position = {
            'type': 'LONG',
            'symbol': symbol,
            'shares': target_shares,
            'entry_price': price,
            'entry_value': position_value,
            'entry_date': signal_row['Trading_Datetime'],
            'entry_signal': signal_row,
            'total_cost': total_cost
        }

        self.positions[symbol] = position

        print(f"    OPENED LONG {symbol}: {target_shares:,} shares @ ${price:.2f}")
        print(f"    Position value: ${position_value:,.2f} (target: ${self.TARGET_POSITION_SIZE:,})")
        print(f"    Cash after: ${self.cash:,.2f}")
        print(f"    Positions: {len(self.positions)}/{self.MAX_POSITIONS}")

        return True

    def open_short_position(self, symbol, signal_row, price):
        """Open a short position with strict $25K sizing"""
        # For shorts, target same notional value
        target_shares = int(self.TARGET_POSITION_SIZE / price)

        if target_shares <= 0:
            print(f"    Cannot open SHORT {symbol} - price too high (${price:.2f})")
            return False

        # Short position calculations
        notional_value = target_shares * price
        margin_requirement = notional_value * 0.5  # 50% margin
        proceeds = notional_value - self.COMMISSION

        # Check margin requirement
        if margin_requirement > self.cash:
            print(f"    Cannot open SHORT {symbol} - insufficient margin")
            return False

        # Execute short
        self.cash -= margin_requirement  # Hold margin

        position = {
            'type': 'SHORT',
            'symbol': symbol,
            'shares': target_shares,
            'entry_price': price,
            'entry_value': notional_value,
            'entry_date': signal_row['Trading_Datetime'],
            'entry_signal': signal_row,
            'margin_held': margin_requirement,
            'proceeds': proceeds
        }

        self.positions[symbol] = position

        print(f"    OPENED SHORT {symbol}: {target_shares:,} shares @ ${price:.2f}")
        print(f"    Notional value: ${notional_value:,.2f} (target: ${self.TARGET_POSITION_SIZE:,})")
        print(f"    Margin held: ${margin_requirement:,.2f}")
        print(f"    Cash after: ${self.cash:,.2f}")
        print(f"    Positions: {len(self.positions)}/{self.MAX_POSITIONS}")

        return True

    def close_position(self, symbol, signal_row, price):
        """Close existing position"""
        if symbol not in self.positions:
            return False

        position = self.positions[symbol]

        if position['type'] == 'LONG':
            # Close long position
            gross_proceeds = position['shares'] * price
            net_proceeds = gross_proceeds - self.COMMISSION

            # Calculate P&L
            total_cost = position['total_cost']
            net_pnl = net_proceeds - total_cost

            # Update cash
            self.cash += net_proceeds

        else:  # SHORT
            # Close short position
            cover_cost = position['shares'] * price + self.COMMISSION

            # Calculate P&L
            net_pnl = position['proceeds'] - cover_cost

            # Release margin and pay cover cost
            self.cash += position['margin_held'] - cover_cost

        # Calculate metrics
        pnl_percent = (net_pnl / position['entry_value']) * 100
        days_held = (signal_row['Trading_Datetime'] - position['entry_date']).days

        # Create trade record
        trade = {
            'Trade_Num': len(self.completed_trades) + 1,
            'Symbol': symbol,
            'Type': position['type'],
            'Entry_Date': position['entry_date'],
            'Entry_Price': position['entry_price'],
            'Exit_Date': signal_row['Trading_Datetime'],
            'Exit_Price': price,
            'Shares': position['shares'],
            'Entry_Value': position['entry_value'],
            'Exit_Value': position['shares'] * price,
            'Net_PnL': net_pnl,
            'PnL_Percent': pnl_percent,
            'Days_Held': max(days_held, 1),
            'Entry_Signal_Strength': position['entry_signal']['Signal_Strength'],
            'Exit_Signal': signal_row['Signal'],
            'Exit_Signal_Strength': signal_row['Signal_Strength'],
            'Cash_After': self.cash,
            'Exit_Condition': 'Closed'
        }

        self.completed_trades.append(trade)

        # Update tracking
        self.last_action[symbol] = position['type']
        del self.positions[symbol]

        pnl_color = Fore.GREEN if net_pnl > 0 else Fore.RED
        print(f"    {pnl_color}CLOSED {position['type']} {symbol}: "
              f"${position['entry_price']:.2f} → ${price:.2f} = "
              f"{pnl_percent:+.2f}% (${net_pnl:+.2f}){Style.RESET_ALL}")
        print(f"    Cash after: ${self.cash:,.2f}")
        print(f"    Positions: {len(self.positions)}/{self.MAX_POSITIONS}")

        return True

    def process_signals(self):
        """Process all signals sequentially"""
        print(f"\n{Fore.CYAN}Processing signals with strict position management...{Style.RESET_ALL}")

        for idx, row in self.signals_df.iterrows():
            signal = row['Signal']
            symbol = row['Symbol']
            signal_time = row['Trading_Datetime']

            print(f"\n--- Signal {idx+1}/{len(self.signals_df)}: {signal} {symbol} at {signal_time} ---")

            # Get price
            price = self.get_price(symbol, signal_time)
            if price is None:
                print(f"    SKIPPED - No price data for {symbol}")
                continue

            print(f"    Price: ${price:.2f}")

            # Check if this closes an existing position
            if symbol in self.positions:
                position = self.positions[symbol]
                should_close = False

                if position['type'] == 'LONG' and signal == 'SELL':
                    should_close = True
                elif position['type'] == 'SHORT' and signal == 'BUY':
                    should_close = True

                if should_close:
                    self.close_position(symbol, row, price)
                    continue
                else:
                    print(f"    Signal {signal} does not close existing {position['type']} position in {symbol}")
                    continue

            # Try to open new position (ignore HOLD signals)
            if signal == 'BUY':
                can_open, reason = self.can_open_position(symbol, 'LONG')
                if can_open:
                    self.open_long_position(symbol, row, price)
                else:
                    print(f"    BLOCKED BUY {symbol}: {reason}")

            elif signal == 'SELL':
                can_open, reason = self.can_open_position(symbol, 'SHORT')
                if can_open:
                    self.open_short_position(symbol, row, price)
                else:
                    print(f"    BLOCKED SELL {symbol}: {reason}")

            elif signal == 'HOLD':
                print(f"    HOLD {symbol} - signal ignored")

            # Portfolio validation
            total_invested = sum([pos['entry_value'] for pos in self.positions.values()])
            total_portfolio = self.cash + total_invested

            print(f"    Portfolio: ${self.cash:,.2f} cash + ${total_invested:,.2f} invested = ${total_portfolio:,.2f}")

            # Sanity check
            if total_portfolio > self.INITIAL_CAPITAL * 1.2:  # 20% growth limit
                print(f"    {Fore.RED}WARNING: Portfolio grew suspiciously (${total_portfolio:,.2f}){Style.RESET_ALL}")

            time.sleep(0.5)  # Rate limiting

    def generate_final_report(self):
        """Generate comprehensive final report"""
        print(f"\n{Back.BLUE}{Fore.WHITE} FINAL PORTFOLIO REPORT {Style.RESET_ALL}")

        # Calculate current portfolio value
        total_invested = sum([pos['entry_value'] for pos in self.positions.values()])
        total_portfolio_value = self.cash + total_invested
        total_return = total_portfolio_value - self.INITIAL_CAPITAL
        return_percent = (total_return / self.INITIAL_CAPITAL) * 100

        print(f"Starting Capital: ${self.INITIAL_CAPITAL:,}")
        print(f"Current Cash: ${self.cash:,.2f}")
        print(f"Invested Amount: ${total_invested:,.2f}")
        print(f"Total Portfolio Value: ${total_portfolio_value:,.2f}")
        print(f"Total Return: ${total_return:+,.2f} ({return_percent:+.2f}%)")

        # Validate position sizes
        print(f"\n{Back.GREEN}{Fore.WHITE} POSITION SIZE VALIDATION {Style.RESET_ALL}")
        if self.positions:
            for symbol, pos in self.positions.items():
                size_diff = pos['entry_value'] - self.TARGET_POSITION_SIZE
                size_pct_diff = (size_diff / self.TARGET_POSITION_SIZE) * 100
                color = Fore.GREEN if abs(size_pct_diff) < 5 else Fore.RED
                print(f"{color}{symbol}: ${pos['entry_value']:,.2f} "
                      f"(target: ${self.TARGET_POSITION_SIZE:,}, "
                      f"diff: {size_pct_diff:+.1f}%){Style.RESET_ALL}")
        else:
            print("No open positions")

        # Trade summary
        if self.completed_trades:
            winners = len([t for t in self.completed_trades if t['Net_PnL'] > 0])
            total_trades = len(self.completed_trades)
            win_rate = (winners / total_trades) * 100
            total_realized_pnl = sum([t['Net_PnL'] for t in self.completed_trades])

            print(f"\n{Back.CYAN}{Fore.WHITE} TRADING PERFORMANCE {Style.RESET_ALL}")
            print(f"Completed Trades: {total_trades}")
            print(f"Winners: {winners} ({win_rate:.1f}%)")
            print(f"Total Realized P&L: ${total_realized_pnl:+,.2f}")
            print(f"Average P&L per Trade: ${total_realized_pnl/total_trades:+.2f}")

        # Position management stats
        print(f"\n{Back.YELLOW}{Fore.BLACK} POSITION MANAGEMENT {Style.RESET_ALL}")
        print(f"Max Positions Used: {max(len(self.positions), len(self.completed_trades))}/{self.MAX_POSITIONS}")
        print(f"Current Positions: {len(self.positions)}/{self.MAX_POSITIONS}")
        print(f"Average Position Size: ${total_invested/max(len(self.positions),1):,.2f}" if self.positions else "No positions")

    def save_results(self):
        """Save all results to CSV"""
        all_records = []

        # Add completed trades
        for trade in self.completed_trades:
            all_records.append(trade)

        # Add open positions
        for symbol, pos in self.positions.items():
            # Get current price for unrealized P&L
            current_price = self.get_price(symbol, datetime.now())
            if current_price is None:
                current_price = pos['entry_price']

            if pos['type'] == 'LONG':
                unrealized_pnl = (current_price - pos['entry_price']) * pos['shares'] - (2 * self.COMMISSION)
            else:
                unrealized_pnl = (pos['entry_price'] - current_price) * pos['shares'] - (2 * self.COMMISSION)

            unrealized_pct = (unrealized_pnl / pos['entry_value']) * 100

            open_record = {
                'Trade_Num': len(all_records) + 1,
                'Symbol': symbol,
                'Type': pos['type'],
                'Entry_Date': pos['entry_date'],
                'Entry_Price': pos['entry_price'],
                'Exit_Date': None,
                'Exit_Price': current_price,
                'Shares': pos['shares'],
                'Entry_Value': pos['entry_value'],
                'Exit_Value': pos['shares'] * current_price,
                'Net_PnL': unrealized_pnl,
                'PnL_Percent': unrealized_pct,
                'Days_Held': (datetime.now() - pos['entry_date']).days,
                'Entry_Signal_Strength': pos['entry_signal']['Signal_Strength'],
                'Exit_Signal': 'OPEN',
                'Exit_Signal_Strength': None,
                'Cash_After': self.cash,
                'Exit_Condition': 'OPEN'
            }
            all_records.append(open_record)

        if all_records:
            df = pd.DataFrame(all_records)

            output_dir = Path("backtest_results")
            output_dir.mkdir(exist_ok=True)

            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = output_dir / f"strict_position_trades_{timestamp}.csv"

            df.to_csv(filename, index=False)

            print(f"\n{Fore.GREEN}Results saved to: {filename}{Style.RESET_ALL}")
            print(f"Total records: {len(all_records)} ({len(self.completed_trades)} closed, {len(self.positions)} open)")

            return filename

        return None

    def run_backtest(self):
        """Run complete backtest"""
        print(f"\n{Back.CYAN}{Fore.WHITE} STARTING STRICT POSITION BACKTEST {Style.RESET_ALL}")

        self.process_signals()
        self.generate_final_report()
        csv_file = self.save_results()

        return {
            'completed_trades': len(self.completed_trades),
            'open_positions': len(self.positions),
            'final_cash': self.cash,
            'csv_file': csv_file
        }

# Main execution
if __name__ == "__main__":
    # Load API key
    api_key_path = Path("D:/Monte_Carlo/FMP_api_Key.txt")
    if api_key_path.exists():
        with open(api_key_path, 'r') as f:
            api_key = f.read().strip()
        print(f"API key loaded from {api_key_path}")
    else:
        print("Please provide FMP API key")
        exit()

    # Configuration
    CSV_FILE = r"D:\1\data_LLM\ticker_specific_signals_LLM.csv"

    try:
        print(f"Starting Strict Position Management Backtest...")

        system = StrictPositionTradingSystem(
            api_key=api_key,
            csv_file_path=CSV_FILE
        )

        results = system.run_backtest()

        print(f"\n{Back.GREEN}{Fore.WHITE} BACKTEST COMPLETED {Style.RESET_ALL}")
        print(f"Completed trades: {results['completed_trades']}")
        print(f"Open positions: {results['open_positions']}")
        print(f"Final cash: ${results['final_cash']:,.2f}")

        if results['csv_file']:
            print(f"Results saved to: {results['csv_file']}")

    except Exception as e:
        print(f"{Fore.RED}Error: {e}{Style.RESET_ALL}")
        import traceback
        traceback.print_exc()

API key loaded from D:\Monte_Carlo\FMP_api_Key.txt
Starting Strict Position Management Backtest...
Loaded 144 signals
Date range: 2025-08-26 00:00:00 to 2025-08-30 00:00:00
PORTFOLIO CONFIGURATION:
Initial Capital: $100,000
Position Size: $25,000
Max Positions: 4
Commission: $2.0 per trade

 STARTING STRICT POSITION BACKTEST 

Processing signals with strict position management...

--- Signal 1/144: BUY MSFT at 2025-08-26 09:30:00 ---
    Price: $504.06
    OPENED LONG MSFT: 49 shares @ $504.06
    Position value: $24,698.94 (target: $25,000)
    Cash after: $75,299.06
    Positions: 1/4
    Portfolio: $75,299.06 cash + $24,698.94 invested = $99,998.00

--- Signal 2/144: SELL TWLO at 2025-08-26 09:30:00 ---
    Price: $104.93
    OPENED SHORT TWLO: 238 shares @ $104.93
    Notional value: $24,973.34 (target: $25,000)
    Margin held: $12,486.67
    Cash after: $62,812.39
    Positions: 2/4
    Portfolio: $62,812.39 cash + $49,672.28 invested = $112,484.67

--- Signal 3/144: SELL TTD at 

Analysis of csv file using Claude AI - RAG

In [None]:
#Analysis using RAG

In [1]:
!pip install pandas numpy anthropic faiss-cpu sentence-transformers pathlib

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp310-cp310-win_amd64.whl.metadata (5.2 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-5.1.0-py3-none-any.whl.metadata (16 kB)
Collecting pathlib
  Downloading pathlib-1.0.1-py3-none-any.whl.metadata (5.1 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.56.0-py3-none-any.whl.metadata (40 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.8.0-cp310-cp310-win_amd64.whl.metadata (30 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.7.1-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting scipy (from sentence-transformers)
  Using cached scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Using cached huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting Pillow (from sentence-transformers)
  Downloading pillow-11.

In [2]:
import pandas as pd
import numpy as np
from anthropic import Anthropic
import json
from datetime import datetime
import os
from pathlib import Path
import faiss
from sentence_transformers import SentenceTransformer
import pickle
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TradingRAGSystem:
    """RAG system for trading analysis with knowledge base"""

    def __init__(self, api_key_path, output_dir="D://1//data_LLM"):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

        # Load API key
        self.api_key = self._load_api_key(api_key_path)
        self.client = Anthropic(api_key=self.api_key) if self.api_key else None

        # Initialize embedding model
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')

        # Trading knowledge base
        self.knowledge_base = self._create_trading_knowledge_base()

        # Setup vector database
        self.vector_db = None
        self.document_store = []
        self._setup_vector_database()

    def _load_api_key(self, key_path):
        """Load API key from text file"""
        try:
            with open(key_path, 'r') as f:
                return f.read().strip()
        except FileNotFoundError:
            logger.error(f"API key file not found at {key_path}")
            return None

    def _create_trading_knowledge_base(self):
        """Create comprehensive trading knowledge base for RAG"""
        return [
            # Performance Metrics
            {
                "category": "performance_metrics",
                "title": "Sharpe Ratio Analysis",
                "content": """The Sharpe ratio measures risk-adjusted returns. A Sharpe ratio above 1.0 is good, above 2.0 is excellent.
                Formula: (Mean Return - Risk Free Rate) / Standard Deviation of Returns.
                Values below 0.5 indicate poor risk-adjusted performance."""
            },
            {
                "category": "performance_metrics",
                "title": "Profit Factor Interpretation",
                "content": """Profit factor = Gross Profit / Gross Loss. Values above 1.5 are good, above 2.0 are excellent.
                A profit factor below 1.0 means the strategy loses money. Values between 1.0-1.5 may be marginal after costs."""
            },
            {
                "category": "performance_metrics",
                "title": "Win Rate Guidelines",
                "content": """Win rate alone doesn't determine profitability. High-frequency strategies often need 55%+ win rates.
                Swing trading strategies can be profitable with 40%+ win rates if winners are larger than losers.
                The key is the relationship between win rate and average win/loss ratio."""
            },

            # Risk Management
            {
                "category": "risk_management",
                "title": "Maximum Drawdown Analysis",
                "content": """Maximum drawdown should typically not exceed 10-20% for institutional strategies, 30% for retail.
                Drawdown duration is as important as magnitude. Strategies with long recovery periods may have psychological issues.
                Consider the Calmar ratio: Annual Return / Maximum Drawdown."""
            },
            {
                "category": "risk_management",
                "title": "Position Sizing Rules",
                "content": """Kelly Criterion for optimal position size: f = (bp-q)/b where b=odds, p=win probability, q=loss probability.
                Conservative approach: Use 25-50% of Kelly fraction. Never risk more than 1-2% per trade.
                Consider correlation between positions to avoid concentration risk."""
            },
            {
                "category": "risk_management",
                "title": "Consecutive Loss Management",
                "content": """More than 5-7 consecutive losses may indicate strategy degradation or market regime change.
                Consider implementing stop-loss at strategy level after excessive consecutive losses.
                Analyze if losses cluster during specific market conditions."""
            },

            # Market Analysis
            {
                "category": "market_analysis",
                "title": "Market Regime Detection",
                "content": """Strategies perform differently in trending vs. ranging markets, high vs. low volatility environments.
                Bull markets often favor momentum strategies, bear markets favor mean reversion.
                Consider VIX levels, market correlations, and economic cycles in analysis."""
            },
            {
                "category": "market_analysis",
                "title": "Seasonality Effects",
                "content": """Many strategies show seasonal patterns. January effect, summer doldrums, year-end rallies.
                Analyze performance by month, day of week, and holiday periods.
                Consider adjusting position sizes or taking breaks during historically poor periods."""
            },

            # Strategy Optimization
            {
                "category": "strategy_optimization",
                "title": "Overfitting Detection",
                "content": """High returns with very specific parameters may indicate overfitting to historical data.
                Use walk-forward analysis and out-of-sample testing. Robust strategies work across parameter ranges.
                Be suspicious of strategies that perform too well in backtests."""
            },
            {
                "category": "strategy_optimization",
                "title": "Transaction Costs Impact",
                "content": """Include realistic transaction costs: commissions, bid-ask spread, slippage, market impact.
                High-frequency strategies are more sensitive to costs. Consider cost per trade and total cost ratio.
                Factor in financing costs for overnight positions."""
            },

            # Statistical Analysis
            {
                "category": "statistical_analysis",
                "title": "Statistical Significance",
                "content": """Ensure sufficient trades for statistical significance (typically 100+ trades minimum).
                Use t-tests to verify if returns are significantly different from zero.
                Consider confidence intervals around performance metrics."""
            },
            {
                "category": "statistical_analysis",
                "title": "Distribution Analysis",
                "content": """Analyze return distribution: skewness, kurtosis, tail risk. Normal distribution assumptions often fail.
                Fat tails indicate higher risk of extreme losses. Positive skew is preferable.
                Consider Value at Risk (VaR) and Conditional Value at Risk (CVaR)."""
            }
        ]

    def _setup_vector_database(self):
        """Setup FAISS vector database for knowledge retrieval"""
        logger.info("Setting up vector database...")

        # Create embeddings for knowledge base
        documents = [doc["content"] for doc in self.knowledge_base]
        embeddings = self.embedder.encode(documents)

        # Setup FAISS index
        dimension = embeddings.shape[1]
        self.vector_db = faiss.IndexFlatIP(dimension)  # Inner product for similarity

        # Normalize embeddings for cosine similarity
        faiss.normalize_L2(embeddings)
        self.vector_db.add(embeddings.astype('float32'))

        # Store document metadata
        self.document_store = self.knowledge_base.copy()

        logger.info(f"Vector database setup complete with {len(documents)} documents")

    def retrieve_relevant_context(self, query, k=5):
        """Retrieve relevant trading knowledge based on query"""
        # Encode query
        query_embedding = self.embedder.encode([query])
        faiss.normalize_L2(query_embedding)

        # Search vector database
        scores, indices = self.vector_db.search(query_embedding.astype('float32'), k)

        # Retrieve relevant documents
        relevant_docs = []
        for i, idx in enumerate(indices[0]):
            if scores[0][i] > 0.3:  # Similarity threshold
                relevant_docs.append({
                    "title": self.document_store[idx]["title"],
                    "content": self.document_store[idx]["content"],
                    "category": self.document_store[idx]["category"],
                    "similarity_score": float(scores[0][i])
                })

        return relevant_docs

    def analyze_csv_structure(self, df):
        """Analyze CSV structure and generate comprehensive statistics"""
        analysis = {}

        # Basic info
        analysis['metadata'] = {
            'total_trades': len(df),
            'columns': list(df.columns),
            'file_analyzed': datetime.now().isoformat(),
            'data_shape': df.shape
        }

        # Auto-detect column types
        pnl_cols = [col for col in df.columns if any(term in col.lower()
                    for term in ['pnl', 'profit', 'return', 'gain', 'loss'])]

        date_cols = [col for col in df.columns if any(term in col.lower()
                     for term in ['date', 'time', 'timestamp', 'entry', 'exit'])]

        price_cols = [col for col in df.columns if any(term in col.lower()
                      for term in ['price', 'entry', 'exit', 'close', 'open'])]

        volume_cols = [col for col in df.columns if any(term in col.lower()
                       for term in ['volume', 'quantity', 'size', 'shares'])]

        analysis['column_classification'] = {
            'pnl_columns': pnl_cols,
            'date_columns': date_cols,
            'price_columns': price_cols,
            'volume_columns': volume_cols
        }

        # Comprehensive trading metrics
        if pnl_cols:
            pnl_col = pnl_cols[0]
            pnl_data = df[pnl_col].dropna()

            analysis['trading_metrics'] = {
                'total_pnl': float(pnl_data.sum()),
                'avg_pnl_per_trade': float(pnl_data.mean()),
                'median_pnl': float(pnl_data.median()),
                'pnl_std': float(pnl_data.std()),
                'win_rate': float((pnl_data > 0).mean() * 100),
                'loss_rate': float((pnl_data < 0).mean() * 100),
                'avg_winner': float(pnl_data[pnl_data > 0].mean()) if (pnl_data > 0).any() else 0,
                'avg_loser': float(pnl_data[pnl_data < 0].mean()) if (pnl_data < 0).any() else 0,
                'largest_winner': float(pnl_data.max()),
                'largest_loser': float(pnl_data.min()),
                'total_winners': int((pnl_data > 0).sum()),
                'total_losers': int((pnl_data < 0).sum()),
                'profit_factor': float(pnl_data[pnl_data > 0].sum() / abs(pnl_data[pnl_data < 0].sum())) if (pnl_data < 0).any() else float('inf'),
                'sharpe_ratio': float(pnl_data.mean() / pnl_data.std()) if pnl_data.std() != 0 else 0,
                'sortino_ratio': float(pnl_data.mean() / pnl_data[pnl_data < 0].std()) if (pnl_data < 0).any() and pnl_data[pnl_data < 0].std() != 0 else 0
            }

            # Drawdown analysis
            cumulative_pnl = pnl_data.cumsum()
            running_max = cumulative_pnl.expanding().max()
            drawdown = cumulative_pnl - running_max

            analysis['risk_metrics'] = {
                'max_drawdown': float(drawdown.min()),
                'max_drawdown_pct': float((drawdown.min() / running_max[drawdown.idxmin()]) * 100) if running_max[drawdown.idxmin()] != 0 else 0,
                'avg_drawdown': float(drawdown[drawdown < 0].mean()) if (drawdown < 0).any() else 0,
                'recovery_factor': float(analysis['trading_metrics']['total_pnl'] / abs(drawdown.min())) if drawdown.min() != 0 else float('inf'),
                'calmar_ratio': float(analysis['trading_metrics']['total_pnl'] / abs(drawdown.min())) if drawdown.min() != 0 else float('inf')
            }

            # Consecutive losses analysis
            losses = (pnl_data < 0).astype(int)
            consecutive_losses = []
            current_streak = 0

            for loss in losses:
                if loss:
                    current_streak += 1
                else:
                    if current_streak > 0:
                        consecutive_losses.append(current_streak)
                    current_streak = 0

            if current_streak > 0:
                consecutive_losses.append(current_streak)

            analysis['streak_analysis'] = {
                'max_consecutive_losses': max(consecutive_losses) if consecutive_losses else 0,
                'avg_consecutive_losses': float(np.mean(consecutive_losses)) if consecutive_losses else 0,
                'total_loss_streaks': len(consecutive_losses)
            }

        # Statistical analysis
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        analysis['statistical_summary'] = {}

        for col in numeric_cols:
            col_data = df[col].dropna()
            if len(col_data) > 0:
                analysis['statistical_summary'][col] = {
                    'count': int(len(col_data)),
                    'mean': float(col_data.mean()),
                    'std': float(col_data.std()),
                    'min': float(col_data.min()),
                    'max': float(col_data.max()),
                    'skewness': float(col_data.skew()),
                    'kurtosis': float(col_data.kurtosis())
                }

        # Sample data for Claude analysis
        analysis['sample_data'] = df.head(10).to_dict('records')

        return analysis

    def generate_rag_enhanced_analysis(self, csv_analysis):
        """Generate analysis using RAG with retrieved trading knowledge"""
        logger.info("Generating RAG-enhanced analysis...")

        # Create query for knowledge retrieval based on CSV analysis
        trading_metrics = csv_analysis.get('trading_metrics', {})
        risk_metrics = csv_analysis.get('risk_metrics', {})

        # Generate queries for different aspects
        queries = [
            f"sharpe ratio {trading_metrics.get('sharpe_ratio', 0)} profit factor {trading_metrics.get('profit_factor', 0)} analysis",
            f"win rate {trading_metrics.get('win_rate', 0)} risk management strategy optimization",
            f"maximum drawdown {risk_metrics.get('max_drawdown_pct', 0)} risk analysis",
            f"consecutive losses {csv_analysis.get('streak_analysis', {}).get('max_consecutive_losses', 0)} strategy performance"
        ]

        # Retrieve relevant context
        all_context = []
        for query in queries:
            context = self.retrieve_relevant_context(query, k=3)
            all_context.extend(context)

        # Remove duplicates based on title
        unique_context = []
        seen_titles = set()
        for doc in all_context:
            if doc['title'] not in seen_titles:
                unique_context.append(doc)
                seen_titles.add(doc['title'])

        # Create enhanced prompt with retrieved knowledge
        context_text = "\n\n".join([
            f"**{doc['title']}** (Relevance: {doc['similarity_score']:.2f})\n{doc['content']}"
            for doc in unique_context[:8]  # Use top 8 most relevant
        ])

        enhanced_prompt = f"""
TRADING STRATEGY ANALYSIS WITH DOMAIN EXPERTISE

You are analyzing a trading backtest with access to professional trading knowledge. Use the provided domain expertise to give actionable insights.

=== RELEVANT TRADING EXPERTISE ===
{context_text}

=== BACKTEST DATA ANALYSIS ===
{json.dumps(csv_analysis, indent=2, default=str)}

=== ANALYSIS REQUIREMENTS ===
Based on the trading expertise above and the backtest data, provide a comprehensive analysis including:

1. **Performance Assessment**: Use the domain knowledge to interpret the metrics properly
2. **Risk Evaluation**: Apply professional risk management principles
3. **Strategy Optimization**: Provide specific recommendations based on best practices
4. **Red Flags & Warnings**: Identify potential issues using professional expertise
5. **Action Items**: Concrete steps for improvement
6. **Market Context**: Consider market conditions and regime analysis

Format your response as structured JSON with the following keys:
- "executive_summary": Brief overall assessment
- "performance_analysis": Detailed performance review
- "risk_assessment": Risk analysis with recommendations
- "optimization_recommendations": Specific improvement suggestions
- "warnings_and_concerns": Important issues to address
- "action_items": Prioritized list of next steps
- "confidence_level": Your confidence in the analysis (1-10 scale)

Be specific, use numbers from the data, and reference the trading expertise where relevant.
"""

        # Send to Claude API
        try:
            message = self.client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=4000,
                temperature=0.1,
                messages=[{"role": "user", "content": enhanced_prompt}]
            )

            analysis_text = message.content[0].text

            # Try to parse as JSON, fallback to structured text
            try:
                claude_analysis = json.loads(analysis_text)
            except json.JSONDecodeError:
                # Create structured format if JSON parsing fails
                claude_analysis = {
                    "executive_summary": "Analysis completed but not in JSON format",
                    "full_analysis": analysis_text,
                    "parsing_note": "Claude response was not valid JSON, stored as text"
                }

            # Combine all results
            final_analysis = {
                "analysis_metadata": {
                    "timestamp": datetime.now().isoformat(),
                    "analysis_type": "RAG_Enhanced_Trading_Analysis",
                    "claude_model": "claude-sonnet-4-20250514",
                    "knowledge_base_docs_used": len(unique_context)
                },
                "csv_data_analysis": csv_analysis,
                "retrieved_context": unique_context,
                "claude_analysis": claude_analysis
            }

            return final_analysis

        except Exception as e:
            logger.error(f"Error in Claude API call: {str(e)}")
            return {
                "error": str(e),
                "csv_analysis": csv_analysis,
                "retrieved_context": unique_context
            }

    def save_analysis_results(self, analysis_data, filename_prefix="backtest_rag_analysis"):
        """Save analysis results in JSON format"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{filename_prefix}_{timestamp}.json"
        filepath = self.output_dir / filename

        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                json.dump(analysis_data, f, indent=2, ensure_ascii=False, default=str)

            logger.info(f"Analysis saved to: {filepath}")
            return str(filepath)

        except Exception as e:
            logger.error(f"Error saving analysis: {str(e)}")
            return None

    def analyze_trading_backtest(self, csv_path):
        """Complete RAG-enhanced trading backtest analysis pipeline"""
        logger.info(f"Starting RAG-enhanced analysis of {csv_path}")

        try:
            # Load CSV data
            df = pd.read_csv(csv_path)
            logger.info(f"Loaded CSV with {len(df)} rows and {len(df.columns)} columns")

            # Analyze CSV structure and calculate metrics
            csv_analysis = self.analyze_csv_structure(df)

            # Generate RAG-enhanced analysis
            if self.client:
                full_analysis = self.generate_rag_enhanced_analysis(csv_analysis)
            else:
                full_analysis = {
                    "error": "No API key available",
                    "csv_analysis": csv_analysis
                }

            # Save results
            output_path = self.save_analysis_results(full_analysis)

            return full_analysis, output_path

        except Exception as e:
            logger.error(f"Error in analysis pipeline: {str(e)}")
            return {"error": str(e)}, None


def main():
    """Main execution function"""
    # Configuration
    csv_path = "D://1//data_LLM//backtest_results//strict_position_trades_20250831_005459.csv"
    api_key_path = "D://1//claude_api_key.txt"
    output_dir = "D://1//data_LLM"

    # Initialize RAG system
    logger.info("Initializing RAG-enhanced trading analysis system...")
    rag_system = TradingRAGSystem(api_key_path, output_dir)

    # Run analysis
    analysis_results, output_path = rag_system.analyze_trading_backtest(csv_path)

    # Display summary
    print("\n" + "="*60)
    print("RAG-ENHANCED TRADING BACKTEST ANALYSIS COMPLETE")
    print("="*60)

    if "error" not in analysis_results:
        # Display key metrics
        trading_metrics = analysis_results.get("csv_data_analysis", {}).get("trading_metrics", {})
        print(f"Total Trades: {analysis_results.get('csv_data_analysis', {}).get('metadata', {}).get('total_trades', 'N/A')}")
        print(f"Win Rate: {trading_metrics.get('win_rate', 'N/A'):.2f}%")
        print(f"Profit Factor: {trading_metrics.get('profit_factor', 'N/A'):.2f}")
        print(f"Sharpe Ratio: {trading_metrics.get('sharpe_ratio', 'N/A'):.2f}")
        print(f"Total P&L: {trading_metrics.get('total_pnl', 'N/A'):.2f}")

        # Display Claude's executive summary if available
        claude_analysis = analysis_results.get("claude_analysis", {})
        if isinstance(claude_analysis, dict) and "executive_summary" in claude_analysis:
            print(f"\nExecutive Summary: {claude_analysis['executive_summary']}")
    else:
        print(f"Analysis completed with errors: {analysis_results['error']}")

    if output_path:
        print(f"\nDetailed analysis saved to: {output_path}")

    return analysis_results, output_path


if __name__ == "__main__":
    # Install requirements reminder
    required_packages = [
        "pandas", "numpy", "anthropic", "faiss-cpu",
        "sentence-transformers", "pathlib"
    ]

    print("Required packages:")
    print("pip install " + " ".join(required_packages))
    print("\n" + "-"*60 + "\n")

    # Run main analysis
    main()

  from .autonotebook import tqdm as notebook_tqdm
INFO:__main__:Initializing RAG-enhanced trading analysis system...
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Required packages:
pip install pandas numpy anthropic faiss-cpu sentence-transformers pathlib

------------------------------------------------------------



To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
INFO:__main__:Setting up vector database...
Batches: 100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
INFO:__main__:Vector database setup complete with 12 documents
INFO:__main__:Starting RAG-enhanced analysis of D://1//data_LLM//backtest_results//strict_position_trades_20250831_005459.csv
INFO:__main__:Loaded CSV with 5 rows and 18 columns
INFO:__main__:Generating RAG-enhanced analysis...
Batches: 100%|██████████| 1/1 [00:00<00:00, 37.04it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 47.64it/s]
Batches


RAG-ENHANCED TRADING BACKTEST ANALYSIS COMPLETE
Total Trades: 5
Win Rate: 40.00%
Profit Factor: 0.21
Sharpe Ratio: -0.48
Total P&L: -2771.68

Executive Summary: Analysis completed but not in JSON format

Detailed analysis saved to: D:\1\data_LLM\backtest_rag_analysis_20250901_020915.json
