In [1]:
# --------------------------------------------------------------
# Healthcare Data Analysis using NumPy Broadcasting
# Author: Preethi
# Industry: Healthcare
# --------------------------------------------------------------

import numpy as np

# --------------------------------------------------------------
# Step 1: Create Structured Array
# --------------------------------------------------------------
# Define the structure of the patient dataset: name (string), age (int), bp (float), cholesterol (float)
patient_dtype = [('name', 'U10'), ('age', 'i4'), ('bp', 'f4'), ('cholesterol', 'f4')]

# Create structured NumPy array (First record has your name)
patients = np.array([
    ('Preethi', 28, 120.0, 190.0),
    ('Arun', 45, 135.0, 210.0),
    ('Meena', 32, 128.0, 180.0),
    ('Ravi', 55, 145.0, 230.0),
    ('Sneha', 40, 130.0, 200.0)
], dtype=patient_dtype)

print("=== Original Patient Dataset ===")
print(patients)

# --------------------------------------------------------------
# Step 2: Apply Broadcasting Operations
# --------------------------------------------------------------
# Broadcasting allows arithmetic operations to be applied to entire columns (fields)
# without explicit loops.

# Reduce cholesterol levels for all patients by 5% (simulate health improvement)
patients['cholesterol'] *= 0.95  # broadcasting a scalar across all elements

# Add +2 to all BP readings (measurement correction)
patients['bp'] += 2  # broadcast scalar addition

print("\n=== After General Broadcasting Operations ===")
print(patients)

# --------------------------------------------------------------
# Step 3: Conditional Broadcasting
# --------------------------------------------------------------
# Apply extra 10% cholesterol reduction for patients aged above 50

# np.where(condition, value_if_true, value_if_false)
patients['cholesterol'] = np.where(
    patients['age'] > 50,
    patients['cholesterol'] * 0.9,  # Extra 10% reduction
    patients['cholesterol']          # No change otherwise
)

print("\n=== After Conditional Broadcasting (Age > 50) ===")
print(patients)

# --------------------------------------------------------------
# Step 4: Compute Results
# --------------------------------------------------------------
# Compute average blood pressure and cholesterol level
avg_bp = np.mean(patients['bp'])
avg_chol = np.mean(patients['cholesterol'])

print(f"\nAverage Blood Pressure: {avg_bp:.2f}")
print(f"Average Cholesterol Level: {avg_chol:.2f}")

# Find highest and lowest cholesterol levels using NumPy functions
max_chol = np.max(patients['cholesterol'])
min_chol = np.min(patients['cholesterol'])
print(f"Highest Cholesterol Level: {max_chol}")
print(f"Lowest Cholesterol Level: {min_chol}")

# --------------------------------------------------------------
# Step 5: Broadcasting Explanation
# --------------------------------------------------------------
print("\n--- Broadcasting Explanation ---")
print("1. Broadcasting allowed operations (like *0.95 or +2) to be applied to entire columns.")
print("2. NumPy automatically expanded scalars to match the array shape, eliminating loops.")
print("3. Conditional broadcasting (np.where) applied extra reductions only where 'age > 50'.")
print("4. This improves performance and readability for data analysis tasks.")

# --------------------------------------------------------------
# Step 6: Reflection Questions
# --------------------------------------------------------------

# Q1: How does broadcasting simplify arithmetic operations in NumPy?
print("\nQ1. How does broadcasting simplify arithmetic operations in NumPy?")
print("A1. Broadcasting automatically applies arithmetic operations across arrays or scalars")
print("    without writing explicit loops, making numerical code concise and efficient.")

# Q2: What are the advantages of structured arrays over lists or dictionaries?
print("\nQ2. What are the advantages of structured arrays over lists or dictionaries?")
print("A2. Structured arrays allow multiple data types (string, int, float) in one array,")
print("    provide fast vectorized operations, and make data manipulation easier than")
print("    using Python lists or dictionaries, which require manual looping.")

# Q3: When does broadcasting lead to an error?
print("\nQ3. When does broadcasting lead to an error?")
print("A3. Broadcasting fails when array shapes are incompatible. For example,")
print("    adding arrays of shape (3,) and (2,) causes an error because NumPy")
print("    cannot align their dimensions for element-wise operations.")


=== Original Patient Dataset ===
[('Preethi', 28, 120., 190.) ('Arun', 45, 135., 210.)
 ('Meena', 32, 128., 180.) ('Ravi', 55, 145., 230.)
 ('Sneha', 40, 130., 200.)]

=== After General Broadcasting Operations ===
[('Preethi', 28, 122., 180.5) ('Arun', 45, 137., 199.5)
 ('Meena', 32, 130., 171. ) ('Ravi', 55, 147., 218.5)
 ('Sneha', 40, 132., 190. )]

=== After Conditional Broadcasting (Age > 50) ===
[('Preethi', 28, 122., 180.5 ) ('Arun', 45, 137., 199.5 )
 ('Meena', 32, 130., 171.  ) ('Ravi', 55, 147., 196.65)
 ('Sneha', 40, 132., 190.  )]

Average Blood Pressure: 133.60
Average Cholesterol Level: 187.53
Highest Cholesterol Level: 199.5
Lowest Cholesterol Level: 171.0

--- Broadcasting Explanation ---
1. Broadcasting allowed operations (like *0.95 or +2) to be applied to entire columns.
2. NumPy automatically expanded scalars to match the array shape, eliminating loops.
3. Conditional broadcasting (np.where) applied extra reductions only where 'age > 50'.
4. This improves performance