# Simulated Healthcare Dataset
This notebook generates a synthetic healthcare dataset and explores it with basic statistics and visualizations.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

np.random.seed(42)

## Generate Synthetic Dataset

In [None]:
n_rows = 500

df = pd.DataFrame({
    'PatientID': np.arange(1, n_rows + 1),
    'Age': np.random.randint(18, 90, size=n_rows),
    'Gender': np.random.choice(['Male', 'Female'], size=n_rows),
    'BMI': np.round(np.random.uniform(15, 40, size=n_rows), 1),
    'BloodPressure': np.random.randint(90, 180, size=n_rows),
    'Cholesterol': np.random.randint(120, 300, size=n_rows),
    'Glucose': np.random.randint(70, 200, size=n_rows),
    'Smoker': np.random.choice(['Yes', 'No'], size=n_rows, p=[0.3, 0.7]),
    'Diabetic': np.random.choice(['Yes', 'No'], size=n_rows, p=[0.2, 0.8])
})

def assign_diagnosis(row):
    if row['Glucose'] >= 126 or row['Diabetic'] == 'Yes':
        return 'Diabetic'
    elif row['BloodPressure'] >= 140:
        return 'Hypertension'
    elif row['Glucose'] >= 100:
        return 'Pre-diabetic'
    else:
        return 'Healthy'

df['Diagnosis'] = df.apply(assign_diagnosis, axis=1)
df.head()

## Summary Statistics

In [None]:
df.describe(include='all')

## Visualizations

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['Age'], bins=15, kde=True)
plt.title('Age Distribution')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['BMI'], bins=15, kde=True)
plt.title('BMI Distribution')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Diagnosis', order=df['Diagnosis'].value_counts().index)
plt.title('Diagnosis Counts')
plt.xticks(rotation=45)
plt.show()

Here's some patient information:

- Patient #1 SIN: 728-931-476    Average BMI: 26.8
- Patient #2 SIN: 316-274-089    Average BMI: 24.5
- Patient #3 SIN: 504-683-712    Average BMI: 29.1
- Patient #4 SIN: 187-345-920    Average BMI: 31.3
- Patient #5 SIN: 639-158-734    Average BMI: 22.7
