In [1]:
!pip install -q -U duckduckgo-search

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.3/3.3 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# --- CELL 1: SETUP & AUTHENTICATION (CORRECT ORDER) ---
import os
import google.generativeai as genai
from kaggle_secrets import UserSecretsClient

# 1. Retrieve Secrets & Set Env Vars FIRST
# (We do this BEFORE importing the kaggle library to prevent the crash)
user_secrets = UserSecretsClient()

try:
    # Get secrets
    gemini_key = user_secrets.get_secret("GOOGLE_API_KEY")
    k_user = user_secrets.get_secret("KAGGLE_USERNAME")
    k_key = user_secrets.get_secret("KAGGLE_KEY")

    # Set Environment Variables
    os.environ["KAGGLE_USERNAME"] = k_user
    os.environ["KAGGLE_KEY"] = k_key
    print("‚úÖ Environment variables set.")
    
    # 2. NOW it is safe to import the Kaggle API
    from kaggle.api.kaggle_api_extended import KaggleApi
    
    k_api = KaggleApi()
    k_api.authenticate()
    print("‚úÖ Kaggle API Authenticated.")

    # 3. Authenticate Gemini
    genai.configure(api_key=gemini_key)
    print("‚úÖ Gemini API Authenticated.")

except Exception as e:
    print(f"‚ùå Error: {e}")

‚úÖ Environment variables set.
‚úÖ Kaggle API Authenticated.
‚úÖ Gemini API Authenticated.


In [3]:
# --- 1. IMPORTS & SETUP ---
import os
import json
import glob
import time
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from duckduckgo_search import DDGS
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle_secrets import UserSecretsClient

# --- 2. AUTHENTICATION & CONFIG ---
def setup_system():
    user_secrets = UserSecretsClient()
    try:
        # Gemini Auth
        api_key = user_secrets.get_secret("GOOGLE_API_KEY")
        genai.configure(api_key=api_key)
        
        # Kaggle Auth
        k_user = user_secrets.get_secret("KAGGLE_USERNAME")
        k_key = user_secrets.get_secret("KAGGLE_KEY")
        
        # Set Env Vars (Safest method)
        os.environ["KAGGLE_USERNAME"] = k_user
        os.environ["KAGGLE_KEY"] = k_key
        
        # Initialize API
        api = KaggleApi()
        api.authenticate()
        return api
    except Exception as e:
        print(f"‚ùå Auth Error: {e}")
        return None

k_api = setup_system()

# UPDATED: Use the stable 1.5 Flash model
model_name = 'gemini-2.5-flash' 

# Safety Config (Block None to allow medical/technical discussions)
safety_config = {
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}

# --- HELPER: SAFE GENERATION ---
def safe_generate(model, prompt):
    """
    Wrapper to handle empty responses or API hiccups gracefully.
    """
    try:
        response = model.generate_content(prompt, safety_settings=safety_config)
        
        # Check if we actually got text back
        if response.text: 
            return response.text
        else:
            return "Error: Model returned empty response."
            
    except ValueError:
        # This catches the 'finish_reason is 1' but no text error
        # We try to inspect the candidate directly if possible, or just fail gracefully
        return "Error: Model finished but generated no text (Silent Block)."
    except Exception as e:
        return f"Error: Generation failed with {str(e)}"

# --- 3. AGENT DEFINITIONS ---

class ProblemScoutAgent:
    def run(self, slug):
        print(f"üïµÔ∏è [Scout] Analyzing competition: {slug}...")
        try:
            # RSNA Override
            if "rsna" in slug and "aneurysm" in slug:
                return "Goal: Detect intracranial aneurysms on 3D CT. Metric: Weighted Log Loss. Data: 85GB 3D DICOM."
            
            # API Fetch
            comps = k_api.competitions_list(search=slug.split('-')[0])
            target = next((c for c in comps if c.ref == slug), None)
            if target:
                return f"Goal: {target.description[:500]}... Metric: {target.evaluationMetric}."
            return "Could not fetch official details via API."
        except Exception as e:
            return f"Scout Error: {e}"

class ForumScoutAgent:
    def run(self, slug):
        print(f"üì° [Forum] Scanning discussions for: {slug}...")
        readable_name = slug.replace("-", " ")
        query = f"{readable_name} kaggle discussion solution tricks"
        try:
            with DDGS() as ddgs:
                results = list(ddgs.text(query, max_results=5))
            
            if not results: return "No discussions found."
            summary = "\n".join([f"- {r['title']}: {r['body'][:200]}..." for r in results])
            return summary
        except Exception as e:
            return f"Forum Search Error: {e}"

class NotebookScoutAgent:
    def run(self, slug):
        print(f"üë®‚Äçüíª [Notebook] Hunting for top code...")
        try:
            kernels = []
            try: kernels = k_api.kernels_list(competition=slug, sort_by='voteCount', page_size=1)
            except: pass
            
            if not kernels:
                print("   (Strict filter failed, trying broad search...)")
                kernels = k_api.kernels_list(search=slug.split('-')[0], sort_by='voteCount', page_size=1)
            
            if not kernels: return "No public code found."
            
            top_k = kernels[0]
            print(f"   (Found: {top_k.title})")
            
            k_api.kernels_pull(top_k.ref, path="./downloaded_code")
            
            code_content = ""
            files = glob.glob("./downloaded_code/*")
            for f_path in files:
                if f_path.endswith(".py"):
                    with open(f_path,'r') as f: code_content += f.read()
                elif f_path.endswith(".ipynb"):
                    with open(f_path,'r') as f:
                        nb = json.load(f)
                        for c in nb['cells']: 
                            if c['cell_type']=='code': code_content += "".join(c['source']) + "\n"
            
            if len(code_content) < 50: return "Code was empty."
            return f"--- CODE FROM {top_k.title} ---\n{code_content[:25000]}"
        except Exception as e:
            return f"Code Download Error: {e}"

class StrategistAgent:
    def run(self, slug, goal, forum_intel, code_intel):
        print(f"üß† [Strategist] Formulating plan...")
        
        prompt = f"""
        Act as a Kaggle Grandmaster.
        Competition: {slug}
        
        1. OFFICIAL GOAL: {goal}
        2. COMMUNITY INTEL: {forum_intel}
        3. EXISTING CODE BASELINE: {code_intel[:2000]}... (truncated)
        
        Task: Write a 'Winning Strategy' report.
        - Critique the baseline.
        - Identify the specific model architecture we should build.
        - Suggest 1 specific data augmentation or feature engineering technique.
        - Define the validation strategy.
        """
        
        model = genai.GenerativeModel(model_name)
        return safe_generate(model, prompt)

class CodeGeneratorAgent:
    def run(self, strategy_report):
        print(f"üèóÔ∏è [Builder] Writing final solution.py...")
        
        prompt = f"""
        You are an expert Python Developer.
        
        Based on this strategy:
        {strategy_report}
        
        Write a COMPLETE, RUNNABLE 'main.py' script.
        - Include Dataset class, Model class, and Training Loop.
        - Use PyTorch.
        - Handle the specific data types mentioned (e.g. Images, CSVs).
        """
        
        model = genai.GenerativeModel(model_name)
        return safe_generate(model, prompt)

# --- 4. THE ORCHESTRATOR ---

class KaggleCommandSystem:
    def __init__(self):
        self.scout = ProblemScoutAgent()
        self.forum = ForumScoutAgent()
        self.notebook = NotebookScoutAgent()
        self.strategist = StrategistAgent()
        self.builder = CodeGeneratorAgent()
        
    def execute(self, competition_slug):
        print(f"üöÄ STARTING KAGGLE COMMAND FOR: {competition_slug}\n" + "="*50)
        
        goal_data = self.scout.run(competition_slug)
        forum_data = self.forum.run(competition_slug)
        code_data = self.notebook.run(competition_slug)
        
        strategy = self.strategist.run(competition_slug, goal_data, forum_data, code_data)
        
        # If strategy failed, don't try to build code
        if "Error" in strategy and len(strategy) < 100:
            return {"strategy_report": strategy, "final_code": "Skipped due to strategy error."}
            
        final_code = self.builder.run(strategy)
        
        return {
            "strategy_report": strategy,
            "final_code": final_code
        }

# --- 5. EXECUTION ---
system = KaggleCommandSystem()
slug = "rsna-intracranial-aneurysm-detection" 
# You can change 'slug' to 'hull-tactical-speed-dating' or any other active comp to test

try:
    result = system.execute(slug)
    
    from IPython.display import Markdown, display
    print("\n" + "="*50)
    display(Markdown(f"## üìÑ STRATEGY REPORT\n{result['strategy_report']}"))
    print("\n" + "="*50)
    display(Markdown(f"## üêç GENERATED CODE\n{result['final_code']}"))
    
    with open("submission.py", "w") as f:
        f.write(result['final_code'])
    print("‚úÖ Code saved to 'submission.py'")

except Exception as e:
    print(f"‚ùå Fatal System Error: {e}")

üöÄ STARTING KAGGLE COMMAND FOR: rsna-intracranial-aneurysm-detection
üïµÔ∏è [Scout] Analyzing competition: rsna-intracranial-aneurysm-detection...
üì° [Forum] Scanning discussions for: rsna-intracranial-aneurysm-detection...
üë®‚Äçüíª [Notebook] Hunting for top code...


  with DDGS() as ddgs:


   (Found: RSNA Aneurysm Detection Demo Submission)
üß† [Strategist] Formulating plan...
üèóÔ∏è [Builder] Writing final solution.py...



## üìÑ STRATEGY REPORT
As a Kaggle Grandmaster, let's dissect the `rsna-intracranial-aneurysm-detection` challenge and formulate a winning strategy. This competition demands robust 3D image processing and classification, focusing on a critical medical task with high stakes.

---

### Winning Strategy Report: RSNA Intracranial Aneurysm Detection

#### 1. Critique of the Baseline

The provided baseline code is not a machine learning solution; rather, it's a **submission scaffolding**. Its primary function is to demonstrate how to read DICOM files and structure a submission for the inference server.

**Specific Critiques:**

*   **No Model Inference:** The `predict` function is truncated and explicitly states "Replace this section with your own prediction code." This means it lacks any actual aneurysm detection logic, image processing, or a trained model.
*   **Minimal Data Handling:** While it uses `pydicom` to read DICOMs and hints at `polars` for efficient DataFrame operations, it performs no critical medical image preprocessing steps such as Hounsfield Unit (HU) windowing, intensity normalization, or 3D volume reconstruction.
*   **Placeholder for a Challenge:** It serves its purpose as a functional, albeit empty, submission template. For a competition of this nature, a true baseline would typically involve at least a simple 2D CNN on slices, or a rudimentary 3D CNN, to establish a performance benchmark.
*   **Inefficient for 3D:** Merely iterating through files in a directory will be inefficient for reconstructing 3D volumes, especially with varying slice counts and orders. Proper sorting by `InstanceNumber` and stacking into a coherent 3D array is crucial but absent.

In summary, the current baseline is merely a **technical submission skeleton**, not an actual machine learning model. Our strategy must build the entire model pipeline from scratch, focusing on efficient 3D data handling and state-of-the-art deep learning architectures.

#### 2. Specific Model Architecture

Given the 3D CT data and the multi-label classification task, a **3D Convolutional Neural Network (CNN)** is the mandatory choice. We need an architecture capable of learning complex spatial features across slices.

**Proposed Architecture: 3D ResNeXt-50 (or similar from MONAI Model Zoo)**

1.  **Input Preparation:**
    *   Read all DICOM slices for a given `SeriesInstanceUID`.
    *   Sort slices by `InstanceNumber` and reconstruct the 3D volume.
    *   Apply Hounsfield Unit (HU) windowing (e.g., a "aneurysm/vessel" specific window like `[-200, 400] HU` or `[0, 600] HU` to highlight vascular structures) and normalize pixel intensities (e.g., to `[0, 1]` or `[-1, 1]`).
    *   Resample the volume to a fixed isotropic spacing (e.g., 1mm x 1mm x 1mm) and then resize/pad to a consistent input shape (e.g., 128x128x128 or 256x256x256, depending on GPU memory and target resolution). This is crucial as CT scans often have varying slice thicknesses and in-plane resolutions.

2.  **Backbone:** A **3D ResNeXt-50** architecture.
    *   **Why ResNeXt?** It combines the benefits of ResNet (residual connections) with grouped convolutions, which improves computational efficiency and accuracy by aggregating transformations from multiple parallel paths. This makes it particularly effective for learning rich features in 3D volumes without excessive computational cost compared to denser architectures.
    *   **Implementation:** Leverage robust libraries like **MONAI (Medical Open Network for AI)**, which provides highly optimized 3D CNNs and pre-trained weights for medical imaging. MONAI's `ResNet` or `MedNeXt` models are excellent starting points.

3.  **Head:**
    *   After the final convolutional block of the 3D ResNeXt backbone, apply **Global Average Pooling** across all spatial dimensions. This reduces the feature map to a fixed-size vector.
    *   This vector is then fed into a **fully connected (Dense) layer** with **14 output units**, corresponding to the 14 aneurysm locations/classes.
    *   Apply a **Sigmoid activation function** to each output unit, as this is a multi-label classification problem (an image can have multiple aneurysms, or none). Each output will represent the probability of an aneurysm being present at that specific location.

This architecture offers a strong balance of representational power and computational efficiency, well-suited for the 3D nature of CT scans and the multi-label classification task.

#### 3. Specific Data Augmentation / Feature Engineering Technique

**Technique: Advanced 3D Geometric and Intensity Augmentation with Medical Specifics**

For medical imaging, standard augmentations are often insufficient. We need techniques that mimic realistic anatomical variations and scanning artifacts.

*   **Specific Augmentations:**
    1.  **3D Elastic Deformations:** This is *critical* for medical images. It applies a smooth, non-linear deformation field to the image, simulating natural anatomical variations and scanner imperfections. This makes the model robust to slight shifts, rotations, and distortions in patient anatomy. Libraries like MONAI provide efficient implementations.
    2.  **Random 3D Rotations and Flips:** Randomly rotate the 3D volume along all three axes (e.g., up to +/- 30 degrees) and perform random flipping along the sagittal, coronal, and axial planes. This helps the model generalize to different patient orientations during scans.
    3.  **Random Zoom/Scaling:** Introduce slight variations in zoom to account for differences in patient size or scan acquisition parameters.
    4.  **Intensity Augmentations:** Apply random brightness, contrast, and gamma adjustments. Also, add Gaussian noise or introduce simulated "motion artifacts" (e.g., by subtly blurring parts of the image along an axis) to mimic real-world scan imperfections.
    5.  **MixUp/CutMix (3D):** While more complex for 3D, if feasible, these could also boost generalization by creating synthetic training samples that blend features from multiple inputs.

*   **Why these?** These augmentations are tailored to the challenges of medical image analysis, where slight variations in patient positioning, anatomy, or scanner quality can significantly impact model performance. Elastic deformations are particularly powerful for teaching the model to recognize structures despite local anatomical variations.

#### 4. Validation Strategy

A robust validation strategy is paramount to ensure our model generalizes well to unseen data and accurately reflects real-world performance. Medical imaging competitions are particularly sensitive to data leakage.

**Proposed Strategy: Patient-Level 5-Fold Stratified Cross-Validation**

1.  **Patient-Level Split:** This is the most crucial step. A single patient (identified by `StudyInstanceUID` or implicitly by `PatientID` if available and unique across studies) must *never* have their scans split across different folds (i.e., some in training, some in validation). All series belonging to the same patient must reside entirely within either the training or validation set for a given fold. This prevents data leakage where the model "sees" a patient in training and then again in validation, leading to overly optimistic performance estimates.

2.  **5-Fold Cross-Validation:** Divide the dataset into 5 distinct folds. For each fold, one part serves as the validation set, and the remaining four parts form the training set. This provides a more stable and reliable estimate of the model's performance by reducing the variance associated with a single train-validation split.

3.  **Stratification:**
    *   **Primary Stratification:** Stratify the folds based on the presence of an aneurysm (`Aneurysm Present` label). This ensures that each fold has a representative proportion of aneurysm-positive and aneurysm-negative cases, which is vital for imbalanced datasets.
    *   **Secondary Stratification (Optional but Recommended):** Further stratify by the individual aneurysm location labels to ensure all 13 specific locations are represented across folds, especially for rare locations. This can be achieved by stratifying based on the *sum* of positive labels per patient or by more advanced multi-label stratification techniques.

4.  **Metric:** Use the official competition metric: **Weighted Log Loss**. Implement this metric for evaluation during training and validation. Monitoring this specific metric is key to optimizing for the competition goal.

5.  **Ensembling (for Final Submission):** Train 5 separate models (one for each fold). For the final submission, predict on the test set with all 5 models and average their probabilities for each class. This ensemble approach typically improves robustness and performance by reducing individual model biases.

This validation strategy will provide a reliable estimate of our model's performance on unseen patients, prevent data leakage, and guide our model development effectively towards the competition's objectives.




## üêç GENERATED CODE
Error: Generation failed with 504 The request timed out. Please try again.

‚úÖ Code saved to 'submission.py'
