In [1]:
%%capture
pip install ddgs


In [2]:
# --- CELL 1: SETUP & AUTHENTICATION (CORRECT ORDER) ---
import os
import google.generativeai as genai
from kaggle_secrets import UserSecretsClient

# 1. Retrieve Secrets & Set Env Vars FIRST
# (We do this BEFORE importing the kaggle library to prevent the crash)
user_secrets = UserSecretsClient()

try:
    # Get secrets
    gemini_key = user_secrets.get_secret("GOOGLE_API_KEY")
    k_user = user_secrets.get_secret("KAGGLE_USERNAME")
    k_key = user_secrets.get_secret("KAGGLE_KEY")

    # Set Environment Variables
    os.environ["KAGGLE_USERNAME"] = k_user
    os.environ["KAGGLE_KEY"] = k_key
    print("‚úÖ Environment variables set.")
    
    # 2. NOW it is safe to import the Kaggle API
    from kaggle.api.kaggle_api_extended import KaggleApi
    
    k_api = KaggleApi()
    k_api.authenticate()
    print("‚úÖ Kaggle API Authenticated.")

    # 3. Authenticate Gemini
    genai.configure(api_key=gemini_key)
    print("‚úÖ Gemini API Authenticated.")

except Exception as e:
    print(f"‚ùå Error: {e}")

‚úÖ Environment variables set.
‚úÖ Kaggle API Authenticated.
‚úÖ Gemini API Authenticated.


In [3]:
# --- 1. IMPORTS & SETUP ---
import os
import json
import glob
import time
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from ddgs import DDGS
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle_secrets import UserSecretsClient

# --- 2. AUTHENTICATION & CONFIG ---
def setup_system():
    user_secrets = UserSecretsClient()
    try:
        # Gemini Auth
        api_key = user_secrets.get_secret("GOOGLE_API_KEY")
        genai.configure(api_key=api_key)
        
        # Kaggle Auth
        k_user = user_secrets.get_secret("KAGGLE_USERNAME")
        k_key = user_secrets.get_secret("KAGGLE_KEY")
        
        # Set Env Vars (Safest method)
        os.environ["KAGGLE_USERNAME"] = k_user
        os.environ["KAGGLE_KEY"] = k_key
        
        # Initialize API
        api = KaggleApi()
        api.authenticate()
        return api
    except Exception as e:
        print(f"‚ùå Auth Error: {e}")
        return None

k_api = setup_system()

# UPDATED: Use the stable 1.5 Flash model
model_name = 'gemini-2.5-flash' 

# Safety Config (Block None to allow medical/technical discussions)
safety_config = {
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}

# --- HELPER: SAFE GENERATION ---
# --- HELPER: ROBUST GENERATION WITH EXTENDED TIMEOUT ---
def safe_generate(model, prompt):
    """
    Uses standard generation but with a 10-minute timeout to allow 
    for long code blocks without triggering 504 or RST_STREAM errors.
    """
    try:
        # Request a massive timeout (3600s) so it doesn't give up
        response = model.generate_content(
            prompt, 
            safety_settings=safety_config,
            request_options={'timeout': 3600} 
        )
        
        if response.text:
            return response.text
        else:
            return "Error: Model returned empty response."

    except Exception as e:
        return f"Error: Generation failed with {str(e)}"

# --- 3. AGENT DEFINITIONS ---

class ProblemScoutAgent:
    def run(self, slug):
        print(f"üïµÔ∏è [Scout] Analyzing competition: {slug}...")
        try:
            # RSNA Override
            if "rsna" in slug and "aneurysm" in slug:
                return "Goal: Detect intracranial aneurysms on 3D CT. Metric: Weighted Log Loss. Data: 85GB 3D DICOM."
            
            # API Fetch
            comps = k_api.competitions_list(search=slug.split('-')[0])
            target = next((c for c in comps if c.ref == slug), None)
            if target:
                return f"Goal: {target.description[:500]}... Metric: {target.evaluationMetric}."
            return "Could not fetch official details via API."
        except Exception as e:
            return f"Scout Error: {e}"

class ForumScoutAgent:
    def run(self, slug):
        print(f"üì° [Forum] Scanning discussions for: {slug}...")
        readable_name = slug.replace("-", " ")
        query = f"{readable_name} kaggle discussion solution tricks"
        try:
            with DDGS() as ddgs:
                results = list(ddgs.text(query, max_results=5))
            
            if not results: return "No discussions found."
            summary = "\n".join([f"- {r['title']}: {r['body'][:200]}..." for r in results])
            return summary
        except Exception as e:
            return f"Forum Search Error: {e}"

class NotebookScoutAgent:
    def run(self, slug):
        print(f"üë®‚Äçüíª [Notebook] Hunting for top code...")
        try:
            kernels = []
            try: kernels = k_api.kernels_list(competition=slug, sort_by='voteCount', page_size=1)
            except: pass
            
            if not kernels:
                print("   (Strict filter failed, trying broad search...)")
                kernels = k_api.kernels_list(search=slug.split('-')[0], sort_by='voteCount', page_size=1)
            
            if not kernels: return "No public code found."
            
            top_k = kernels[0]
            print(f"   (Found: {top_k.title})")
            
            k_api.kernels_pull(top_k.ref, path="./downloaded_code")
            
            code_content = ""
            files = glob.glob("./downloaded_code/*")
            for f_path in files:
                if f_path.endswith(".py"):
                    with open(f_path,'r') as f: code_content += f.read()
                elif f_path.endswith(".ipynb"):
                    with open(f_path,'r') as f:
                        nb = json.load(f)
                        for c in nb['cells']: 
                            if c['cell_type']=='code': code_content += "".join(c['source']) + "\n"
            
            if len(code_content) < 50: return "Code was empty."
            return f"--- CODE FROM {top_k.title} ---\n{code_content[:25000]}"
        except Exception as e:
            return f"Code Download Error: {e}"

class StrategistAgent:
    def run(self, slug, goal, forum_intel, code_intel):
        print(f"üß† [Strategist] Formulating plan...")
        
        prompt = f"""
        Act as a Kaggle Grandmaster.
        Competition: {slug}
        
        1. OFFICIAL GOAL: {goal}
        2. COMMUNITY INTEL: {forum_intel}
        3. EXISTING CODE BASELINE: {code_intel[:2000]}... (truncated)
        
        Task: Write a 'Winning Strategy' report.
        - Critique the baseline.
        - Identify the specific model architecture we should build.
        - Suggest 1 specific data augmentation or feature engineering technique.
        - Define the validation strategy.
        """
        
        model = genai.GenerativeModel(model_name)
        return safe_generate(model, prompt)

class CodeGeneratorAgent:
    def run(self, strategy_report):
        print(f"üèóÔ∏è [Builder] Writing final solution.py...")
        
        prompt = f"""
        You are an expert Python Developer.
        
        Based on this strategy:
        {strategy_report}
        
        Write a COMPLETE, RUNNABLE 'main.py' script.
        - Include Dataset class, Model class, and Training Loop.
        - Use PyTorch.
        - Handle the specific data types mentioned (e.g. Images, CSVs).
        """
        
        model = genai.GenerativeModel(model_name)
        return safe_generate(model, prompt)

# --- 4. THE ORCHESTRATOR ---

class KaggleCommandSystem:
    def __init__(self):
        self.scout = ProblemScoutAgent()
        self.forum = ForumScoutAgent()
        self.notebook = NotebookScoutAgent()
        self.strategist = StrategistAgent()
        self.builder = CodeGeneratorAgent()
        
    def execute(self, competition_slug):
        print(f"üöÄ STARTING KAGGLE COMMAND FOR: {competition_slug}\n" + "="*50)
        
        goal_data = self.scout.run(competition_slug)
        forum_data = self.forum.run(competition_slug)
        code_data = self.notebook.run(competition_slug)
        
        strategy = self.strategist.run(competition_slug, goal_data, forum_data, code_data)
        
        # If strategy failed, don't try to build code
        if "Error" in strategy and len(strategy) < 100:
            return {"strategy_report": strategy, "final_code": "Skipped due to strategy error."}
            
        final_code = self.builder.run(strategy)
        
        return {
            "strategy_report": strategy,
            "final_code": final_code
        }

# --- 5. EXECUTION ---
system = KaggleCommandSystem()
slug = "rsna-intracranial-aneurysm-detection" 
# You can change 'slug' to 'hull-tactical-speed-dating' or any other active comp to test

try:
    result = system.execute(slug)
    
    from IPython.display import Markdown, display
    print("\n" + "="*50)
    display(Markdown(f"## üìÑ STRATEGY REPORT\n{result['strategy_report']}"))
    print("\n" + "="*50)
    display(Markdown(f"## üêç GENERATED CODE\n{result['final_code']}"))
    
    with open("submission.py", "w") as f:
        f.write(result['final_code'])
    print("‚úÖ Code saved to 'submission.py'")

except Exception as e:
    print(f"‚ùå Fatal System Error: {e}")

üöÄ STARTING KAGGLE COMMAND FOR: rsna-intracranial-aneurysm-detection
üïµÔ∏è [Scout] Analyzing competition: rsna-intracranial-aneurysm-detection...
üì° [Forum] Scanning discussions for: rsna-intracranial-aneurysm-detection...
üë®‚Äçüíª [Notebook] Hunting for top code...
   (Found: RSNA Aneurysm Detection Demo Submission)
üß† [Strategist] Formulating plan...
üèóÔ∏è [Builder] Writing final solution.py...



## üìÑ STRATEGY REPORT
As a Kaggle Grandmaster, my approach to the RSNA Intracranial Aneurysm Detection competition focuses on leveraging state-of-the-art 3D deep learning techniques, robust data handling, and rigorous validation to achieve a top-tier solution.

---

### Winning Strategy Report: RSNA Intracranial Aneurysm Detection

#### 1. Critique of the Baseline

The provided baseline code serves primarily as a structural template for submission, rather than an intelligent solution for aneurysm detection.

*   **Lack of Intelligence:** The baseline contains no machine learning logic. It merely outlines how to read DICOM files, identify series, and format an output DataFrame. This means we are starting from a blank slate regarding model development.
*   **Basic DICOM Handling:** While `pydicom` is used, the example code does not demonstrate crucial medical image processing steps such as converting raw DICOM slices into a coherent 3D volume, handling varying `PixelSpacing` or `SliceThickness`, or applying `RescaleIntercept`/`RescaleSlope` for Hounsfield Unit (HU) conversion. These steps are fundamental for consistent model input.
*   **No Performance Guidance:** Since there's no actual model, the baseline provides no insights into expected performance or computational complexity. The 30-minute inference limit per series, while generous, could mislead new competitors into thinking a simple model is sufficient, whereas complex 3D processing and advanced models will be necessary.
*   **Focus on Boilerplate:** The baseline correctly identifies the `LABEL_COLS`, `ID_COL`, and `DICOM_TAG_ALLOWLIST`, which are important for adherence to the submission format but do not contribute to the core challenge of aneurysm detection.

In summary, the baseline is a bare-bones submission scaffolding. Our "Winning Strategy" must completely supersede this placeholder with a comprehensive, medically-informed, and computationally efficient deep learning pipeline.

#### 2. Specific Model Architecture: Two-Stage 3D Swin-UNETR Ensemble

Given the 3D nature of CT data and the need for both detection and classification across multiple anatomical locations, a sophisticated 3D deep learning architecture is essential. We will adopt a multi-stage approach for robustness and performance:

**Model Architecture:**
We will employ a **3D Swin-UNETR** (Vision Transformer based UNet) as the core architecture, leveraging the robust MONAI framework. Swin-UNETR has demonstrated superior performance in 3D medical image segmentation and localization tasks due to its ability to capture both local and global dependencies within the volumetric data.

**Two-Stage Approach:**
1.  **Stage 1: Aneurysm Localization/Segmentation Head:**
    *   The primary task of the Swin-UNETR will be to output a multi-channel segmentation map. One channel will represent the general probability of *any* aneurysm presence (binary segmentation). Other channels could optionally be trained to segment specific aneurysm locations if high-resolution ground truth masks are available.
    *   The encoder-decoder structure of Swin-UNETR is highly effective for this, allowing the model to learn fine-grained spatial features (decoder) while benefiting from contextual global features (encoder).

2.  **Stage 2: Location-Specific Classification Head(s):**
    *   The output features from the Swin-UNETR's encoder path (or the bottleneck layer) will be fed into a classification head (or multiple parallel heads). This head will learn to classify the 13 specific aneurysm locations and the overall `Aneurysm Present` label based on the contextual information learned by the 3D backbone.
    *   Alternatively, we can extract ROIs (Regions of Interest) around the high-probability aneurysm regions detected in Stage 1 and feed these crops into smaller, specialized 3D CNNs for fine-grained classification of the specific locations.
    *   The final output will be 14 probabilities (13 specific locations + 1 overall `Aneurysm Present`), directly mapping to the `LABEL_COLS`.

**Rationale:**
*   **3D Context:** Swin-UNETR inherently processes the full 3D volume, capturing spatial relationships crucial for aneurysm detection.
*   **Localization-Aware Classification:** By first localizing potential aneurysms, the model is guided to focus on relevant regions, improving the accuracy of the downstream classification heads.
*   **State-of-the-Art:** Vision Transformer architectures are current best-in-class for many medical imaging tasks.
*   **Ensembling:** To further boost robustness and performance, we will train an ensemble of these Swin-UNETR models using different data splits and/or slight architectural variations, averaging their predictions for the final submission.

#### 3. Specific Data Augmentation & Feature Engineering: Multi-Channel CT Windowing with Elastic Deformations

For 3D medical images, robust pre-processing and augmentation are as critical as the model architecture.

1.  **Feature Engineering: Multi-Channel CT Windowing & Intensity Normalization:**
    *   Instead of feeding raw Hounsfield Units (HU) or a single windowed image, we will generate **multiple input channels** from each 3D CT volume. Each channel will be windowed to highlight different anatomical structures relevant to aneurysm detection.
        *   **Vessel Window:** Focus on blood vessels (e.g., window range 100-300 HU) to emphasize the arterial tree.
        *   **Brain Parenchyma Window:** A broader window for brain tissue (e.g., window range 0-80 HU) to provide context for surrounding anatomy.
        *   **Bone Window (Optional but useful):** A very wide window (e.g., 500-1500 HU) can help identify bone structures, crucial for anatomical orientation and potential artifact detection.
    *   Each channel will then be independently normalized to a standard range (e.g., [0, 1] or [-1, 1]). This explicitly provides the model with different "views" of the same data, acting as a powerful feature engineering step.

2.  **Data Augmentation: 3D Elastic Deformations:**
    *   Beyond standard geometric augmentations (random rotations, scaling, flips), **elastic deformations** are crucial for medical images. These non-linear transformations simulate realistic anatomical variations and scanning artifacts, significantly improving the model's robustness and generalization.
    *   We will apply 3D elastic deformations using a randomly generated displacement field, which subtly "stretches" and "compresses" parts of the image volume. This forces the model to learn features that are invariant to minor anatomical shifts, making it more resilient to inter-patient variability.

**Rationale:**
*   **Multi-Channel Windowing:** Empowers the model with expert-level "feature selection" by presenting critical information (vessels, brain, bone) explicitly, rather than relying on the network to learn optimal windowing implicitly.
*   **Elastic Deformations:** Directly addresses the challenge of anatomical variability and improves the model's ability to detect aneurysms despite subtle differences in patient anatomy and image acquisition.

#### 4. Validation Strategy: Patient-Level Stratified K-Fold Cross-Validation

A robust validation strategy is paramount to ensure the model generalizes well to unseen data and avoids patient-level data leakage, which is a common pitfall in medical competitions.

1.  **Patient-Level Split:** The most critical aspect is to ensure that all series and studies belonging to a **single patient (`PatientID`) are entirely contained within either the training set or the validation set for any given fold.** This prevents the model from "seeing" any data from a patient during training and then evaluating on another scan from the *same* patient, which would lead to an overly optimistic performance estimate.

2.  **Stratified K-Fold Cross-Validation:**
    *   We will employ **5-Fold Cross-Validation (K=5)**. This provides a good balance between training data size per fold and robust performance estimation across multiple splits.
    *   **Stratification:** The folds will be stratified primarily based on the presence of `Aneurysm Present`. Additionally, we will endeavor to balance the distribution of positive cases for the 13 specific aneurysm locations across the folds. This is crucial for handling the class imbalance often seen in medical imaging datasets (where rare conditions are challenging to predict). We might use a multi-label stratification technique or an iterative approach to ensure label balance.

3.  **Performance Metrics:**
    *   **Primary Metric:** The official **Weighted Log Loss** will be continuously monitored during training and reported for each validation fold. This directly reflects the competition's evaluation.
    *   **Secondary Metrics:** We will also monitor **individual Log Loss per label** and **AUC-ROC per label** to gain granular insights into model performance, especially for rare aneurysm locations. Precision, Recall, and F1-score will be useful for understanding classification thresholds.

4.  **Early Stopping:** Implement early stopping based on the average Weighted Log Loss across validation folds (or the current fold's validation loss), with a carefully tuned patience parameter to prevent overfitting without premature termination.

5.  **Final Model:** The final submission will be an **ensemble of the 5 models** (one trained for each fold). By averaging the predictions of these models, we leverage their collective strengths, reduce variance, and typically achieve a significant boost in performance and robustness compared to a single model.

This validation strategy, combined with the proposed architecture and data handling, forms a solid foundation for a top-performing solution in the RSNA Intracranial Aneurysm Detection competition.




## üêç GENERATED CODE
Error: Generation failed with Timeout of 600.0s exceeded, last exception: 503 upstream request timeout

‚úÖ Code saved to 'submission.py'
