# 19 - Masked Arrays

This notebook covers NumPy's masked arrays for handling invalid or missing data.

## What You'll Learn
- Creating masked arrays
- Operations with masked arrays
- Masking conditions
- Practical applications

In [None]:
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt

## Creating Masked Arrays

In [None]:
# From array and mask
data = np.array([1, 2, 3, 4, 5])
mask = np.array([False, False, True, False, False])

masked_arr = ma.array(data, mask=mask)
print(f"Data: {data}")
print(f"Mask: {mask}")
print(f"Masked array: {masked_arr}")

In [None]:
# Mask invalid values
data = np.array([1, -999, 3, -999, 5])
masked_arr = ma.masked_equal(data, -999)
print(f"Original: {data}")
print(f"Masked (-999): {masked_arr}")

In [None]:
# Mask based on conditions
data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

masked_greater = ma.masked_greater(data, 7)
masked_less = ma.masked_less(data, 3)
masked_range = ma.masked_outside(data, 3, 7)

print(f"Original: {data}")
print(f"Masked > 7: {masked_greater}")
print(f"Masked < 3: {masked_less}")
print(f"Masked outside 3-7: {masked_range}")

## Operations with Masked Arrays

In [None]:
data = np.array([1, 2, -999, 4, 5, -999, 7, 8, 9, 10])
masked_arr = ma.masked_equal(data, -999)

print(f"Masked array: {masked_arr}")
print(f"\nSum: {masked_arr.sum()}")
print(f"Mean: {masked_arr.mean()}")
print(f"Std: {masked_arr.std():.4f}")
print(f"Count (valid): {masked_arr.count()}")

In [None]:
# Arithmetic with masked arrays
a = ma.array([1, 2, 3, 4], mask=[0, 0, 1, 0])
b = ma.array([5, 6, 7, 8], mask=[0, 1, 0, 0])

print(f"a: {a}")
print(f"b: {b}")
print(f"a + b: {a + b}")

## Filling Masked Values

In [None]:
data = ma.array([1, 2, 3, 4, 5], mask=[0, 0, 1, 0, 1])
print(f"Masked array: {data}")

# Fill with specific value
filled = data.filled(0)
print(f"Filled with 0: {filled}")

# Fill with mean
filled_mean = data.filled(data.mean())
print(f"Filled with mean: {filled_mean}")

## Practical Example: Temperature Data

In [None]:
# Temperature readings with sensor errors (-999)
temps = np.array([22, 24, -999, 25, 23, -999, 26, 27, 25, 24])
days = np.arange(1, 11)

# Create masked array
masked_temps = ma.masked_equal(temps, -999)

print(f"Raw data: {temps}")
print(f"Masked data: {masked_temps}")
print(f"\nAverage temperature: {masked_temps.mean():.1f}°C")
print(f"Valid readings: {masked_temps.count()} out of {len(temps)}")

# Plot
plt.figure(figsize=(10, 4))
plt.plot(days, masked_temps, 'bo-', label='Temperature')
plt.axhline(masked_temps.mean(), color='r', linestyle='--', label=f'Mean: {masked_temps.mean():.1f}°C')
plt.xlabel('Day')
plt.ylabel('Temperature (°C)')
plt.title('Temperature Data with Masked Invalid Readings')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Summary

Key functions:
- `ma.array()` - Create masked array
- `ma.masked_equal()`, `ma.masked_greater()`, etc. - Mask by condition
- Operations automatically ignore masked values
- `.filled()` - Replace masked values

## Exercises

1. Create a masked array that masks all negative values
2. Calculate statistics ignoring masked values
3. Fill masked values with the median of valid values
4. Create a 2D masked array and compute row means

In [None]:
# Your exercises here
