In [None]:
#1: Imports
import pandas as pd
import numpy as np
import plotly.express as px
import sys
import os

# Add parent directory to path
sys.path.append(os.path.join(os.getcwd(), '..'))

from data_simulation.wearables import WearableSimulator
from data_simulation.air_quality import EnvironmentalSimulator
from data_simulation.weather import WeatherSimulator

In [None]:
#2:Generate sample data

wear_sim = WearableSimulator(num_patients=1000)
health_df = wear_sim.generate_daily_data('2024-01-15', 'hospital_01')

env_sim = EnvironmentalSimulator(num_sensors=50)
env_df = env_sim.generate_sensor_data('city_01')

weather_sim = WeatherSimulator()
weather = weather_sim.generate_forecast('city_01')

print(f"Health records: {len(health_df)}")
print(f"Risk cases: {health_df['risk_score'].sum()}")
print(f"Environmental sensors: {len(env_df)}")
print(f"Weather forecast AQI: {weather['air_quality_index']}")


In [None]:
#3: Data quality checks
print("Missing values:")
print(health_df.isnull().sum())

print("\nBasic statistics:")
print(health_df.describe())


In [None]:
#4: Correlation analysis
fig = px.imshow(
    health_df[['heart_rate', 'steps', 'sleep_hours', 'respiratory_rate', 'risk_score']].corr(),
    title="Health Metrics Correlation Matrix",
    color_continuous_scale='RdBu_r'
)
fig.show()


In [None]:
#5: Risk distribution by heart rate
fig = px.histogram(
    health_df, x='heart_rate', color='risk_score',
    title='Heart Rate Distribution by Risk',
    barmode='overlay'
)
fig.show()

In [None]:
#6: Merge environmental data
env_mean = env_df.groupby('node_id').mean().reset_index()
merged_df = health_df.merge(env_mean, on='node_id', how='left')
merged_df = merged_df.fillna(0)

# Correlation with pollution
fig = px.scatter(
    merged_df, x='pm25', y='heart_rate', color='risk_score',
    title='Heart Rate vs PM2.5'
)
fig.show()