In [1]:
%%capture
pip install ddgs


In [2]:
# --- CELL 1: SETUP & AUTHENTICATION (CORRECT ORDER) ---
import os
import google.generativeai as genai
from kaggle_secrets import UserSecretsClient

# 1. Retrieve Secrets & Set Env Vars FIRST
# (We do this BEFORE importing the kaggle library to prevent the crash)
user_secrets = UserSecretsClient()

try:
    # Get secrets
    gemini_key = user_secrets.get_secret("GOOGLE_API_KEY")
    k_user = user_secrets.get_secret("KAGGLE_USERNAME")
    k_key = user_secrets.get_secret("KAGGLE_KEY")

    # Set Environment Variables
    os.environ["KAGGLE_USERNAME"] = k_user
    os.environ["KAGGLE_KEY"] = k_key
    print("‚úÖ Environment variables set.")
    
    # 2. NOW it is safe to import the Kaggle API
    from kaggle.api.kaggle_api_extended import KaggleApi
    
    k_api = KaggleApi()
    k_api.authenticate()
    print("‚úÖ Kaggle API Authenticated.")

    # 3. Authenticate Gemini
    genai.configure(api_key=gemini_key)
    print("‚úÖ Gemini API Authenticated.")

except Exception as e:
    print(f"‚ùå Error: {e}")

‚úÖ Environment variables set.
‚úÖ Kaggle API Authenticated.
‚úÖ Gemini API Authenticated.


In [3]:
# --- 1. IMPORTS & SETUP ---
import os
import json
import glob
import time
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from ddgs import DDGS
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle_secrets import UserSecretsClient

# --- 2. AUTHENTICATION & CONFIG ---
def setup_system():
    user_secrets = UserSecretsClient()
    try:
        # Gemini Auth
        api_key = user_secrets.get_secret("GOOGLE_API_KEY")
        genai.configure(api_key=api_key)
        
        # Kaggle Auth
        k_user = user_secrets.get_secret("KAGGLE_USERNAME")
        k_key = user_secrets.get_secret("KAGGLE_KEY")
        
        # Set Env Vars (Safest method)
        os.environ["KAGGLE_USERNAME"] = k_user
        os.environ["KAGGLE_KEY"] = k_key
        
        # Initialize API
        api = KaggleApi()
        api.authenticate()
        return api
    except Exception as e:
        print(f"‚ùå Auth Error: {e}")
        return None

k_api = setup_system()

# UPDATED: Use the stable 1.5 Flash model
model_name = 'gemini-2.5-flash' 

# Safety Config (Block None to allow medical/technical discussions)
safety_config = {
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}

# --- HELPER: SAFE GENERATION ---
# --- HELPER: ROBUST GENERATION WITH EXTENDED TIMEOUT ---
def safe_generate(model, prompt):
    """
    Uses standard generation but with a 10-minute timeout to allow 
    for long code blocks without triggering 504 or RST_STREAM errors.
    """
    try:
        # Request a massive timeout (600s) so it doesn't give up
        response = model.generate_content(
            prompt, 
            safety_settings=safety_config,
            request_options={'timeout': 600} 
        )
        
        if response.text:
            return response.text
        else:
            return "Error: Model returned empty response."

    except Exception as e:
        return f"Error: Generation failed with {str(e)}"

# --- 3. AGENT DEFINITIONS ---

class ProblemScoutAgent:
    def run(self, slug):
        print(f"üïµÔ∏è [Scout] Analyzing competition: {slug}...")
        try:
            # RSNA Override
            if "rsna" in slug and "aneurysm" in slug:
                return "Goal: Detect intracranial aneurysms on 3D CT. Metric: Weighted Log Loss. Data: 85GB 3D DICOM."
            
            # API Fetch
            comps = k_api.competitions_list(search=slug.split('-')[0])
            target = next((c for c in comps if c.ref == slug), None)
            if target:
                return f"Goal: {target.description[:500]}... Metric: {target.evaluationMetric}."
            return "Could not fetch official details via API."
        except Exception as e:
            return f"Scout Error: {e}"

class ForumScoutAgent:
    def run(self, slug):
        print(f"üì° [Forum] Scanning discussions for: {slug}...")
        readable_name = slug.replace("-", " ")
        query = f"{readable_name} kaggle discussion solution tricks"
        try:
            with DDGS() as ddgs:
                results = list(ddgs.text(query, max_results=5))
            
            if not results: return "No discussions found."
            summary = "\n".join([f"- {r['title']}: {r['body'][:200]}..." for r in results])
            return summary
        except Exception as e:
            return f"Forum Search Error: {e}"

class NotebookScoutAgent:
    def run(self, slug):
        print(f"üë®‚Äçüíª [Notebook] Hunting for top code...")
        try:
            kernels = []
            try: kernels = k_api.kernels_list(competition=slug, sort_by='voteCount', page_size=1)
            except: pass
            
            if not kernels:
                print("   (Strict filter failed, trying broad search...)")
                kernels = k_api.kernels_list(search=slug.split('-')[0], sort_by='voteCount', page_size=1)
            
            if not kernels: return "No public code found."
            
            top_k = kernels[0]
            print(f"   (Found: {top_k.title})")
            
            k_api.kernels_pull(top_k.ref, path="./downloaded_code")
            
            code_content = ""
            files = glob.glob("./downloaded_code/*")
            for f_path in files:
                if f_path.endswith(".py"):
                    with open(f_path,'r') as f: code_content += f.read()
                elif f_path.endswith(".ipynb"):
                    with open(f_path,'r') as f:
                        nb = json.load(f)
                        for c in nb['cells']: 
                            if c['cell_type']=='code': code_content += "".join(c['source']) + "\n"
            
            if len(code_content) < 50: return "Code was empty."
            return f"--- CODE FROM {top_k.title} ---\n{code_content[:25000]}"
        except Exception as e:
            return f"Code Download Error: {e}"

class StrategistAgent:
    def run(self, slug, goal, forum_intel, code_intel):
        print(f"üß† [Strategist] Formulating plan...")
        
        prompt = f"""
        Act as a Kaggle Grandmaster.
        Competition: {slug}
        
        1. OFFICIAL GOAL: {goal}
        2. COMMUNITY INTEL: {forum_intel}
        3. EXISTING CODE BASELINE: {code_intel[:2000]}... (truncated)
        
        Task: Write a 'Winning Strategy' report.
        - Critique the baseline.
        - Identify the specific model architecture we should build.
        - Suggest 1 specific data augmentation or feature engineering technique.
        - Define the validation strategy.
        """
        
        model = genai.GenerativeModel(model_name)
        return safe_generate(model, prompt)

class CodeGeneratorAgent:
    def run(self, strategy_report):
        print(f"üèóÔ∏è [Builder] Writing final solution.py...")
        
        prompt = f"""
        You are an expert Python Developer.
        
        Based on this strategy:
        {strategy_report}
        
        Write a COMPLETE, RUNNABLE 'main.py' script.
        - Include Dataset class, Model class, and Training Loop.
        - Use PyTorch.
        - Handle the specific data types mentioned (e.g. Images, CSVs).
        """
        
        model = genai.GenerativeModel(model_name)
        return safe_generate(model, prompt)

# --- 4. THE ORCHESTRATOR ---

class KaggleCommandSystem:
    def __init__(self):
        self.scout = ProblemScoutAgent()
        self.forum = ForumScoutAgent()
        self.notebook = NotebookScoutAgent()
        self.strategist = StrategistAgent()
        self.builder = CodeGeneratorAgent()
        
    def execute(self, competition_slug):
        print(f"üöÄ STARTING KAGGLE COMMAND FOR: {competition_slug}\n" + "="*50)
        
        goal_data = self.scout.run(competition_slug)
        forum_data = self.forum.run(competition_slug)
        code_data = self.notebook.run(competition_slug)
        
        strategy = self.strategist.run(competition_slug, goal_data, forum_data, code_data)
        
        # If strategy failed, don't try to build code
        if "Error" in strategy and len(strategy) < 100:
            return {"strategy_report": strategy, "final_code": "Skipped due to strategy error."}
            
        final_code = self.builder.run(strategy)
        
        return {
            "strategy_report": strategy,
            "final_code": final_code
        }

# --- 5. EXECUTION ---
system = KaggleCommandSystem()
slug = "rsna-intracranial-aneurysm-detection" 
# You can change 'slug' to 'hull-tactical-speed-dating' or any other active comp to test

try:
    result = system.execute(slug)
    
    from IPython.display import Markdown, display
    print("\n" + "="*50)
    display(Markdown(f"## üìÑ STRATEGY REPORT\n{result['strategy_report']}"))
    print("\n" + "="*50)
    display(Markdown(f"## üêç GENERATED CODE\n{result['final_code']}"))
    
    with open("submission.py", "w") as f:
        f.write(result['final_code'])
    print("‚úÖ Code saved to 'submission.py'")

except Exception as e:
    print(f"‚ùå Fatal System Error: {e}")

üöÄ STARTING KAGGLE COMMAND FOR: rsna-intracranial-aneurysm-detection
üïµÔ∏è [Scout] Analyzing competition: rsna-intracranial-aneurysm-detection...
üì° [Forum] Scanning discussions for: rsna-intracranial-aneurysm-detection...
üë®‚Äçüíª [Notebook] Hunting for top code...
   (Found: RSNA Aneurysm Detection Demo Submission)
üß† [Strategist] Formulating plan...
üèóÔ∏è [Builder] Writing final solution.py...



## üìÑ STRATEGY REPORT
As a Kaggle Grandmaster, my winning strategy for the RSNA Intracranial Aneurysm Detection competition will focus on leveraging state-of-the-art 3D deep learning, robust data handling, and a meticulous validation approach to navigate the complexities of medical imaging and the strict inference time limits.

## Winning Strategy: RSNA Intracranial Aneurysm Detection

### 1. Critique of the Existing Code Baseline

The provided baseline code serves primarily as an **API placeholder** rather than a functional machine learning solution.

**Key Criticisms:**

1.  **No Inference Logic:** The `predict` function is entirely devoid of any actual aneurysm detection or classification logic. It only extracts the `series_id` and attempts to list files, then gets truncated. It doesn't load DICOM data into a 3D volume, preprocess it, or run any model inference.
2.  **Missing Preprocessing:** The most critical step for 3D DICOM data ‚Äì loading multiple slices, stacking them into a 3D volume, handling diverse `PixelSpacing` and `SliceThickness`, windowing (e.g., brain window, vessel window), and intensity normalization ‚Äì is completely absent.
3.  **No Model Integration:** There's no neural network definition, no loading of pre-trained weights, and no forward pass implementation.
4.  **Dummy Output:** While not fully shown, the truncated code implies a placeholder for generating predictions for `LABEL_COLS`, which would likely be default values rather than learned probabilities.
5.  **Performance Blind:** Without actual inference, the baseline gives no indication of how to meet the crucial 30-minute inference limit per series. This will be a major engineering challenge for a comprehensive 3D model.

**In essence, the baseline is a barebones scaffold that defines the competition's input/output interface, but provides zero predictive capability. Our winning strategy must entirely replace its core logic with a sophisticated 3D deep learning pipeline.**

### 2. Specific Model Architecture

Given the 3D CT angiography data and the need to detect and localize aneurysms across multiple anatomical regions, a **3D Vision Transformer (ViT) based UNet-like architecture** will be employed. This combines the strengths of convolutional locality with the global context understanding of Transformers, crucial for complex anatomical structures.

**Proposed Architecture: 3D Swin-UNETR (or similar MONAI-based 3D UNet with a strong ViT encoder)**

*   **Encoder:** A 3D Swin Transformer (e.g., `SwinUNETR` from `MONAI` or a custom implementation).
    *   **Rationale:** Swin Transformers excel at hierarchical feature representation and efficient self-attention across 3D volumes. This allows the model to capture both fine-grained local details (important for small aneurysms) and global anatomical context (important for localizing aneurysms within larger vascular structures). Self-supervised pre-training (e.g., using masked image modeling or contrastive learning on large datasets like Radiology-Net or even general medical image datasets) will be crucial for initializing this encoder effectively.
*   **Decoder:** A lightweight 3D decoder path that upsamples features from the Swin Transformer encoder.
    *   **Rationale:** This part will fuse high-level semantic features with low-level spatial features via skip connections (similar to a UNet), enabling precise localization within the 3D volume.
*   **Output Heads:**
    *   **Segmentation Head (Auxiliary Task):** A voxel-wise segmentation head predicting aneurysm masks. While not directly scored, this forces the model to learn precise localization, which can significantly improve classification performance by providing stronger anatomical priors.
    *   **Classification Heads (Main Task):** Multiple global average pooling layers followed by fully connected layers, one for each of the 14 `LABEL_COLS`.
        *   **Rationale:** The segmentation head helps learn rich, spatially-aware features. These features are then aggregated (via global pooling) for the final multi-label classification, directly addressing the competition metric. This multi-task learning approach generally leads to more robust models.

**Why this choice?**
*   **3D Data:** Directly processes volumetric data, avoiding information loss from 2D slicing.
*   **Global Context:** ViT encoders handle long-range dependencies, essential for understanding complex cerebrovascular anatomy.
*   **Localization (Auxiliary):** Segmentation helps the model focus on anatomical anomalies rather than just abstract features, improving precision.
*   **State-of-the-Art:** Represents a strong current trend in medical image analysis for both segmentation and classification tasks.
*   **Efficiency:** Swin Transformers are designed for efficiency, critical for large 3D inputs and inference time limits.

### 3. Specific Data Augmentation & Feature Engineering Technique

**Proposed Technique: Anatomical-Aware 3D Augmentations with Vessel Segmentation as a Soft Prior.**

This technique combines two powerful concepts for medical imaging: realistic anatomical variations and leveraging domain knowledge.

1.  **Anatomical-Aware 3D Augmentations:**
    *   **Elastic Deformations:** Apply non-linear, realistic deformations to the 3D volume. This simulates natural variations in patient anatomy, making the model robust to slight shifts and distortions in vessel structure and brain morphology. (e.g., using `MONAI`'s `Rand3DElasticd`).
    *   **Contextual Cropping:** During training, instead of purely random cropping, prioritize sampling 3D patches that contain major cerebral arteries or known aneurysm locations (if segmentation masks or bounding box annotations are available). For cases without aneurysms, ensure a diverse set of vessel-rich and brain tissue regions are sampled.
    *   **Intensity Adjustments:** Random brightness/contrast, gamma correction, and realistic CT noise injection. This handles variability in scanner acquisition parameters and dose levels.
    *   **Windowing Variation:** Randomly vary the CT window level and width (e.g., between 'brain window', 'vessel window', 'soft tissue window') within reasonable medical ranges during training. This acts as both an augmentation and a form of feature engineering, exposing the model to different tissue contrasts.

2.  **Vessel Segmentation as a Soft Prior (Feature Engineering):**
    *   **Pre-computed Vessel Mask:** Train a separate, lighter 3D U-Net to segment the cerebral vasculature from the raw CT angiography scans. This can be done offline or during the training pipeline using a pre-trained model.
    *   **Input Channel:** The predicted *probability map* (or a binarized mask) of the vessel segmentation is then concatenated as an additional input channel to the main aneurysm detection model (alongside the original CT volume).
    *   **Rationale:** Aneurysms are anomalies of blood vessels. Providing the model with an explicit "vesselness" map focuses its attention on the most relevant anatomical structures, reducing false positives in non-vascular regions and aiding in the detection of small, subtle aneurysms embedded within complex vascular trees. This acts as a powerful, anatomically-informed feature, guiding the model's learning towards critical areas.

### 4. Validation Strategy

A robust **Stratified Patient-Level K-Fold Cross-Validation** strategy will be implemented to ensure the model's generalization capability and prevent data leakage.

1.  **Patient-Level Split:** The fundamental unit for splitting data will be the `PatientID`. All studies and series belonging to a single `PatientID` must reside entirely within either the training set or the validation set for any given fold. This is crucial to prevent the model from "seeing" parts of a patient's anatomy during training and then being evaluated on a different scan from the *same patient*, leading to an overoptimistic assessment of performance.

2.  **Stratification:**
    *   **Primary Stratification:** Stratify based on the `Aneurysm Present` label. This ensures that each fold maintains a similar proportion of aneurysm-positive and aneurysm-negative patients, which is vital due to the inherent class imbalance of aneurysms.
    *   **Secondary Stratification (if feasible):** Further stratification based on the presence of specific aneurysm locations (`LABEL_COLS`) or the total number of aneurysms per patient could be considered if the distribution is highly skewed, though patient-level `Aneurysm Present` is the most critical.

3.  **K-Fold Cross-Validation:**
    *   Utilize `K=5` or `K=4` folds. Training multiple models (one per fold) allows for a more reliable estimate of performance and can be leveraged for ensemble predictions, which often boost final scores.
    *   During each fold, the model is trained on K-1 folds and validated on the remaining 1 fold.

4.  **Metric Monitoring:** The official competition metric, **Weighted Log Loss**, will be the primary metric monitored for early stopping and model selection during validation. This direct alignment ensures that models are optimized for the competition's objective.

5.  **External Validation (Grandmaster Touch):** If additional, independently sourced public datasets of intracranial CTAs with aneurysm labels are available (e.g., from other research initiatives or previous similar competitions), a final model evaluation on this *completely unseen external dataset* would provide the ultimate testament to the model's true generalization and clinical utility, exceeding what can be learned solely from the competition data splits. This step, while not directly contributing to the Kaggle score, aligns with a Grandmaster's commitment to robust and reliable medical AI.

This comprehensive strategy, from cutting-edge architecture and intelligent data handling to rigorous validation, provides a clear path to achieve a top-tier solution in the RSNA Intracranial Aneurysm Detection competition.




## üêç GENERATED CODE
Error: Generation failed with 504 The request timed out. Please try again.

‚úÖ Code saved to 'submission.py'
