# BioDynamICS: Data Exploration

This notebook performs initial data exploration on the MIMIC-III demo dataset to understand its structure and characteristics.

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add the parent directory to path so we can import our modules
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import our data integration module
from src.data_integration import MimicPatientIntegrator

In [None]:
# Configure visualization settings
plt.style.use('seaborn-v0_8-whitegrid')
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['font.size'] = 12

In [None]:
# Initialize data integrator
data_path = os.path.join('..', 'mimic-iii-clinical-database-demo-1.4')
mimic = MimicPatientIntegrator(data_path)

# Load core tables
patient_stays = mimic.load_core_tables()

## Exploring Patient Demographics

In [None]:
# Get basic demographics
patients = mimic.tables['PATIENTS']

# Show the first few rows
patients.head()

In [None]:
# Gender distribution
gender_counts = patients['gender'].value_counts()
plt.figure(figsize=(8, 5))
sns.barplot(x=gender_counts.index, y=gender_counts.values)
plt.title('Gender Distribution')
plt.ylabel('Count')
plt.show()

In [None]:
# Create a single patient timeline as an example
# Choose the first patient ID
sample_patient_id = patients['subject_id'].iloc[0]
patient_timeline = mimic.create_patient_timeline(sample_patient_id)

# Print patient info
print("Patient Information:")
for key, value in patient_timeline['info'].items():
    print(f"{key}: {value}")

In [None]:
# Examine timeline events
timeline = patient_timeline['timeline']
if not timeline.empty:
    # Count event types
    event_counts = timeline['event_type'].value_counts()
    print("Event type distribution:")
    print(event_counts)
    
    # Plot event distribution over time
    plt.figure(figsize=(14, 6))
    
    # Get the event counts per day
    timeline['date'] = timeline['measurement_time'].dt.date
    daily_counts = timeline.groupby(['date', 'event_type']).size().unstack().fillna(0)
    
    daily_counts.plot(kind='bar', stacked=True)
    plt.title(f'Daily Events for Patient {sample_patient_id}')
    plt.ylabel('Number of Events')
    plt.xlabel('Date')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
else:
    print("No timeline events found for this patient")