### Imports

In [1]:
import autogen
import os
import datetime
import json
import time
from dotenv import load_dotenv

In [None]:
# --- Carica Variabili da .env ---
load_dotenv()

# --- 1. API Key Management ---
# Assicurati che load_dotenv() abbia caricato la chiave da .env
if "GROQ_API_KEY" not in os.environ:
    # print("CRITICAL ERROR: GROQ_API_KEY not found. Please check your .env file.")
    raise ValueError("GROQ_API_KEY is required but not found in environment.")

GROQ_BASE_URL = "https://api.groq.com/openai/v1"

# --- 2. Model Leaderboard (Fallback Strategy) ---
# AutoGen will try models in this specific order.
# If the top model hits a rate limit, it switches to the next.
config_list = [
    {
        "model": "llama-3.3-70b-versatile", # 1. Top Tier (Best reasoning)
        "api_key": os.environ.get("GROQ_API_KEY"),
        "base_url": GROQ_BASE_URL,
    },
    {
        "model": "llama-3.1-70b-versatile", # 2. Backup High Intelligence
        "api_key": os.environ.get("GROQ_API_KEY"),
        "base_url": GROQ_BASE_URL,
    },
    {
        "model": "mixtral-8x7b-32768",      # 3. Solid Alternative
        "api_key": os.environ.get("GROQ_API_KEY"),
        "base_url": GROQ_BASE_URL,
    },
    {
        "model": "llama-3.1-8b-instant",    # 4. High Speed/Limits (The 'workhorse')
        "api_key": os.environ.get("GROQ_API_KEY"),
        "base_url": GROQ_BASE_URL,
    }
]

# Defines the primary model name for system messages
LLM_MODEL_NAME = config_list[0]["model"]

llm_config = {
    "timeout": 120,
    "config_list": config_list,
    "cache_seed": 42, # Cache to save tokens on identical requests
}

# NUOVE RIGHE CRITICHE PER GESTIRE L'ERRORE 413 (Context Window)
MAX_CONTEXT_TOKENS = 6000 # Impostiamo un limite conservativo (es. 6000)
# Questo è il meccanismo che taglia i messaggi più vecchi se il contesto è troppo grande.
llm_config["function_call_filter"] = autogen.token_count_utils.limit_tokens_from_start
llm_config["max_tokens"] = MAX_CONTEXT_TOKENS

# --- 3. Robust Python Logging Function ---
def save_chat_history_to_txt(chat_manager, filename_prefix="execution_log"):
    """
    Extracts chat history and saves it to a clean, readable TXT file.
    Deterministic Python code, no agents involved.
    """
    # Create logs directory
    log_dir = "logs"
    os.makedirs(log_dir, exist_ok=True)
    
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    filename = os.path.join(log_dir, f"{filename_prefix}_{timestamp}.txt")
    
    try:
        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"=== PROJECT FORECASTING LOG - {timestamp} ===\n")
            f.write(f"Model Strategy: Fallback Order {[c['model'] for c in config_list]}\n")
            f.write("="*60 + "\n\n")
            
            # Get the chat history from the GroupChatManager
            # Note: We access the last conversation available in the manager
            messages = list(chat_manager.chat_messages.values())[-1]
            
            for message in messages:
                sender = message.get('name', message.get('role', 'Unknown'))
                content = message.get('content', '')
                
                f.write(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] --- SENDER: {sender.upper()} ---\n")
                f.write("-" * 30 + "\n")
                
                # Format tool calls if present
                if "function_call" in message and message["function_call"]:
                     f.write(f">> TOOL CALL: {message['function_call'].get('name')}\n")
                     f.write(f">> ARGS: {message['function_call'].get('arguments')}\n")
                
                f.write(content)
                f.write("\n\n" + "="*60 + "\n\n")
                
        print(f"✅ Conversation log successfully saved to: {filename}")
    except Exception as e:
        print(f"❌ Error saving log: {e}")

In [3]:
# Cell 2: Global Variables and Admin Agent Setup

# --- Global Configuration Variables ---
# Safety and Control
ASK_CODE_PERMISSION = True  # If True, Admin asks for user approval before code execution.
CAN_ASK_HUMAN = True        # If True, agents can ask for human feedback.

# Trading Strategy Parameters
TRADING_COMMISSION_PCT = 0.00055  # 0.055% commission
SLIPPAGE_K = 0.01                 # Slippage constant
INITIAL_CAPITAL_USDT = 10000.0    # Initial capital

# Time Constraints (UTC)
OPEN_TIME_UTC_START = 11          # Start trading hour
OPEN_TIME_UTC_END = 19            # End trading hour
MAX_PREDICTION_WINDOW_MINUTES = 23 * 60 + 59

# --- Admin Agent Definition ---
admin_config = {
    "human_input_mode": "ALWAYS" if ASK_CODE_PERMISSION else "NEVER",
    "code_execution_config": {
        "work_dir": ".",  # CRITICAL: Restrict to current project folder
        "use_docker": False 
    },
    "max_consecutive_auto_reply": 0 if CAN_ASK_HUMAN else None
}

admin = autogen.UserProxyAgent(
    name="Admin",
    system_message="Execute safe code. If an API loop fails repeatedly, ask the user to pause. Ensure all file operations stay within the project directory.",
    **admin_config
)

In [None]:
# Cell 3: Assistant Agents and Group Orchestration

# Helper function to create agents with token-saving instructions
def create_assistant_agent(name, role_description):
    common_instruction = (
        f"You are operating under strict budget constraints using the FREE Groq API ({LLM_MODEL_NAME}). "
        "1. BE CONCISE. Do not repeat context. "
        "2. All predictive models MUST be **CPU-compatible** (e.g., LGBM, LSTM, RandomForest, XGBoost, Scikit-learn models), as the execution environment has no GPU. "
        "3. If you hit a rate limit (API Error), verify if the error allows a retry after waiting. "
        "If so, suggest using `time.sleep(x)` in the next python code block. "
        "4. Always work within the current directory."
    )
    
    return autogen.AssistantAgent(
        name=name,
        llm_config=llm_config,
        system_message=f"{role_description} {common_instruction}"
    )

# --- Define the Specialists ---
architect = create_assistant_agent(
    "Project_Architect", 
    "You are an expert in robust project structuring. Your first task is to create the folder structure (src, data, models, testing) and a comprehensive config.py file based on all initial parameters."
)

data_scraper = create_assistant_agent(
    "Data_Scraper", 
    "You are the world's expert in data cleaning and integration. Your task is to **load ALL local CSV files**, perform cleaning (NaN, missing data), time-series synchronization (using the 'timestamp' column), and feature generation, preparing a single clean DataFrame for the Model_Builder."
)

model_builder = create_assistant_agent(
    "Model_Builder", 
    "You are the top Data Scientist for time series. Perform feature engineering, train a robust **CPU-compatible model (e.g., LGBM, LSTM, RandomForest, XGBoost, or simple Scikit-learn models)** to predict maximum price change, and save the final model."
)

optimizer = create_assistant_agent(
    "Optimizer_Simulator", 
    "You are the Financial Risk and Optimization expert. Create the trading simulation (100/L - 0.5% liquidation logic) and optimize Integer Leverage (1-100), Stop Loss, and Take Profit to maximize log_growth."
)

code_tester = create_assistant_agent(
    "Code_Tester", 
    "You are the Production QA Engineer. Stress-test the code, ensure robust NaN error handling, and provide the final inference code formatted as requested."
)

# --- Group Chat Setup ---
agent_list = [admin, architect, data_scraper, model_builder, optimizer, code_tester]

group_chat = autogen.GroupChat(
    agents=agent_list, 
    messages=[], 
    max_round=60 
)

manager = autogen.GroupChatManager(
    groupchat=group_chat, 
    llm_config=llm_config
)

In [None]:
# Cell 4: Initiate Process and Save Log

# --- Variabili File ---
MAIN_TARGET_FILE = "Bitcoin futures (USDT) 25-03-2020-10-36-00_07-12-2025-00-00-00 timeframe 1m.csv"
DATA_FOLDER = "data/raw" # Assicurati di creare questa cartella

final_goal = f"""
Develop a complete, production-ready inference system in Python for BTC Futures.

The core task is to predict the maximum price variation (Long or Short) within a window up to {MAX_PREDICTION_WINDOW_MINUTES} minutes, optimized to maximize log_growth.

CRITICAL CONTEXT:
1. All necessary data, including the target time series ({MAIN_TARGET_FILE}) and several auxiliary CSV files, are already available in the local folder '{DATA_FOLDER}'.
2. The final prediction must be based on the '{MAIN_TARGET_FILE}' (1-minute OHLCV data) and the other auxiliary CSVs.

Step-by-step Plan & STRICT Constraints:

1. **Project_Architect**: 
   - Create the necessary folder structure (src, data/raw, data/processed, models, testing).
   - SECURITY RULE: Inside `config.py`, use `os.getenv("GROQ_API_KEY")`.
   - Create `requirements.txt` including `pandas`, `numpy`, `scikit-learn`, `xgboost` (per opzioni CPU-friendly).

2. **Data_Scraper**: 
   - CRITICAL: Do NOT attempt to fetch data from the web (no CCXT, no Groq, no external APIs).
   - Load, clean, and standardize ALL CSV files found in '{DATA_FOLDER}' into unified pandas DataFrames (e.g., using a common 'timestamp' column).
   - The primary target for prediction is the 'close_bybit_futures' column in the file '{MAIN_TARGET_FILE}'.

3. **Model_Builder**: 
   - Train a robust **CPU-compatible model (e.g., RandomForest, XGBoost, or simple Scikit-learn models)** and save it. **No GPU models (e.g., complex LSTMs) are allowed.**

4. **Optimizer_Simulator**: 
   - Run backtest implementing the liquidation logic (100/L - 0.5%).
   - Optimize Integer Leverage (1-100), SL, and TP for max log_growth.

5. **Code_Tester**: 
   - Provide the final `inference.py` script, ensuring the output format is correct.
"""

print(f"--- Starting Agent Collaboration: Data provided by User ---")
print(f"Primary Model: {LLM_MODEL_NAME}")

# Avvio Chat
result = admin.initiate_chat(
    manager,
    message=final_goal,
)

# --- Save Clean Log ---
save_chat_history_to_txt(manager, filename_prefix="BTC_Forecast_Run_Local_Data")

--- Starting Agent Collaboration ---
Primary Model: llama-3.3-70b-versatile
[33mAdmin[0m (to chat_manager):


Develop a complete, production-ready inference system in Python for BTC Futures on Bybit.

The core task is to predict the maximum price variation of BTC futures on Bybit within a window up to 1439 minutes.

Step-by-step Plan & STRICT Constraints:

1. **Project_Architect**: 
   - Create folder structure and `src/config.py`.
   - SECURITY RULE: Inside `config.py`, do NOT hardcode API keys. Use `os.getenv("GROQ_API_KEY")` to load them.
   - Create `requirements.txt` including `ccxt`, `pandas`, `numpy`, `scikit-learn`.

2. **Data_Scraper**: 
   - Fetch historical BTCUSDT data from Bybit.
   - CRITICAL: Do NOT use the Groq API for market data. Groq is for LLM only.
   - You MUST use the `ccxt` library to fetch data specifically from **Bybit** (public API, no keys needed for public data).

3. **Model_Builder**: 
   - Load data, train a model (try using a simple LSTM or LGBM first)

APIStatusError: Error code: 413 - {'error': {'message': 'Request too large for model `llama-3.1-8b-instant` in organization `org_01kbfbg3yaedqrsefey41gymn4` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 11145, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}