# Data Exploration - RSNA Intracranial Aneurysm Detection

This notebook performs an initial exploratory analysis of the Kaggle challenge data for intracranial aneurysm detection.

## 1. Import Libraries
Import the necessary libraries for data analysis and visualization.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

## 2. Load and Display Metadata
Load the metadata CSV files and display their structure.

In [None]:
# File paths (adjust according to your download)
train_csv = '../data/train.csv'
test_csv = '../data/test.csv'

# Load metadata
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

# Show structure
print('Train shape:', train_df.shape)
print('Test shape:', test_df.shape)
display(train_df.head())

## 3. Target Variable Analysis
Visualize the distribution of the target variable (aneurysm presence) in the training set.

In [None]:
# Assuming the target column is named 'any_aneurysm'
if 'any_aneurysm' in train_df.columns:
    plt.figure(figsize=(6,4))
    sns.countplot(x='any_aneurysm', data=train_df)
    plt.title('Target Variable Distribution')
    plt.show()
    print(train_df['any_aneurysm'].value_counts(normalize=True))
else:
    print('Column any_aneurysm not found in the dataset.')

## 4. Medical Image Exploration
Visualize examples of DICOM images and analyze their basic properties.

In [None]:
import pydicom
import cv2

# Example path to a DICOM image (adjust according to your structure)
dicom_dir = '../data/train_images/'
example_study = train_df.iloc[0]['ID'] if 'ID' in train_df.columns else None

if example_study:
    study_path = os.path.join(dicom_dir, str(example_study))
    if os.path.exists(study_path):
        dicom_files = [f for f in os.listdir(study_path) if f.endswith('.dcm')]
        if dicom_files:
            dicom_file = os.path.join(study_path, dicom_files[0])
            ds = pydicom.dcmread(dicom_file)
            img = ds.pixel_array
            plt.imshow(img, cmap='gray')
            plt.title(f'DICOM Image - {dicom_files[0]}')
            plt.axis('off')
            plt.show()
            print('Shape:', img.shape)
            print('DICOM info:', ds)
        else:
            print('No DICOM files found in the study.')
    else:
        print('Study folder not found.')
else:
    print('Study ID not found in the dataset.')