In [1]:
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Substation IDs
substations = np.array(['S1', 'S2', 'S3', 'S4', 'S5'])

# Base load in MW (random between 50 and 100)
base_load = np.random.randint(50, 101, size=5)

# Voltage magnitudes (in per unit, realistic range)
voltages = np.round(np.random.uniform(0.95, 1.05, size=5), 2)

# Annual growth rate (between 3% and 6%)
growth_rate = np.round(np.random.uniform(0.03, 0.06, size=5), 3)

# Peak season multiplier (between 1.1 and 1.25)
peak_multiplier = np.round(np.random.uniform(1.1, 1.25, size=5), 2)

# Display synthetic data
for i in range(5):
    print(f"{substations[i]} → Load: {base_load[i]} MW | Voltage: {voltages[i]} p.u. | Growth: {growth_rate[i]*100:.1f}% | Peak x{peak_multiplier[i]}")

S1 → Load: 88 MW | Voltage: 1.01 p.u. | Growth: 4.8% | Peak x1.13
S2 → Load: 78 MW | Voltage: 0.97 p.u. | Growth: 5.1% | Peak x1.13
S3 → Load: 64 MW | Voltage: 0.97 p.u. | Growth: 3.1% | Peak x1.13
S4 → Load: 92 MW | Voltage: 0.96 p.u. | Growth: 5.9% | Peak x1.15
S5 → Load: 57 MW | Voltage: 1.04 p.u. | Growth: 5.5% | Peak x1.18


# Category 1: Regenerated Arithmetic Operations Exercises (Set 1 of 7)

In [23]:
# Exercise 1: Projected Load After 5 Years
years = 5
future_load = base_load * (1 + growth_rate) ** years
future_load

array([111.24719909, 100.02491312,  74.5544036 , 122.53710844,
        74.49672037])

In [24]:
# Exercise 2: Load Increase Over 5 Years
load_increase = future_load - base_load
load_increase

array([23.24719909, 22.02491312, 10.5544036 , 30.53710844, 17.49672037])

In [25]:
# Exercise 3: Peak Season Load
peak_load = base_load * peak_multiplier
peak_load

array([ 99.44,  88.14,  72.32, 105.8 ,  67.26])

In [26]:
# Exercise 4: Load Normalization
normalized_load = (base_load - base_load.min()) / (base_load.max() - base_load.min())
normalized_load

array([0.88571429, 0.6       , 0.2       , 1.        , 0.        ])

In [27]:
# Exercise 5: Load-to-Voltage Ratio
lv_ratio = base_load / voltages
lv_ratio

array([87.12871287, 80.41237113, 65.97938144, 95.83333333, 54.80769231])

# Category 2: Array Manipulation (Set 2 of 7)

In [28]:
# Exercise 1: Combine Substation Data
# Combine substations, base load, and voltages into a structured array
grid_info = np.column_stack((substations, base_load, voltages))
grid_info

array([['S1', '88', '1.01'],
       ['S2', '78', '0.97'],
       ['S3', '64', '0.97'],
       ['S4', '92', '0.96'],
       ['S5', '57', '1.04']], dtype='<U32')

In [29]:
# Exercise 2: Add a New Substation
# New substation data
new_data = np.array([['S6', 85, 1.01]])

# Append to existing grid
grid_info = np.vstack((grid_info, new_data))
grid_info

array([['S1', '88', '1.01'],
       ['S2', '78', '0.97'],
       ['S3', '64', '0.97'],
       ['S4', '92', '0.96'],
       ['S5', '57', '1.04'],
       ['S6', '85', '1.01']], dtype='<U32')

In [30]:
# Exercise 3: Remove a Faulty Substation
# Remove substation at index 2 (S3)
grid_info = np.delete(grid_info, 2, axis=0)
grid_info

array([['S1', '88', '1.01'],
       ['S2', '78', '0.97'],
       ['S4', '92', '0.96'],
       ['S5', '57', '1.04'],
       ['S6', '85', '1.01']], dtype='<U32')

In [31]:
# Exercise 4: Flatten Load Data
# Flatten base load for ML input
flat_load = base_load.flatten()
flat_load

array([88, 78, 64, 92, 57])

In [32]:
# Exercise 5: Transpose Grid Matrix
# Transpose for column-wise operations
transposed = grid_info.T
transposed

array([['S1', 'S2', 'S4', 'S5', 'S6'],
       ['88', '78', '92', '57', '85'],
       ['1.01', '0.97', '0.96', '1.04', '1.01']], dtype='<U32')

# Category 3: Broadcasting (Set 3 of 7)

In [33]:
# Exercise 1: Add Reserve Margin to All Loads
reserve_margin = 10  # MW
adjusted_load = base_load + reserve_margin
adjusted_load

array([ 98,  88,  74, 102,  67])

In [34]:
# Exercise 2: Normalize Load by Voltage
normalized = base_load / voltages
normalized

array([87.12871287, 80.41237113, 65.97938144, 95.83333333, 54.80769231])

In [35]:
# Exercise 3: Subtract Regional Average Load
regional_avg = np.mean(base_load)
load_deviation = base_load - regional_avg
load_deviation

array([ 12.2,   2.2, -11.8,  16.2, -18.8])

In [36]:
# Exercise 4: Multiply Load by Seasonal Factors
seasonal_factors = np.array([1.1, 1.2, 1.15, 1.05, 1.25])
seasonal_load = base_load * seasonal_factors
seasonal_load

array([96.8 , 93.6 , 73.6 , 96.6 , 71.25])

In [42]:

# Exercise 5: Broadcast 1D Array to 2D Matrix
# Create 2D matrix of 5 days of load
daily_variation = np.array([0.95, 1.0, 1.05, 1.1, 0.9])
load_matrix = base_load[:, np.newaxis] * daily_variation
load_matrix

array([[ 83.6 ,  88.  ,  92.4 ,  96.8 ,  79.2 ],
       [ 74.1 ,  78.  ,  81.9 ,  85.8 ,  70.2 ],
       [ 60.8 ,  64.  ,  67.2 ,  70.4 ,  57.6 ],
       [ 87.4 ,  92.  ,  96.6 , 101.2 ,  82.8 ],
       [ 54.15,  57.  ,  59.85,  62.7 ,  51.3 ]])

# Category 4: Broadcasting with Different Shapes (Set 4 of 7)

In [43]:
# Exercise 1: Weekly Load Profile (5 substations × 7 days)
# Daily variation factors (Mon–Sun)
daily_factors = np.array([0.95, 1.0, 1.05, 1.1, 1.2, 1.15, 0.9])  # shape (7,)

# Broadcast base_load (5,) to (5, 7)
weekly_load = base_load[:, np.newaxis] * daily_factors
weekly_load

array([[ 83.6 ,  88.  ,  92.4 ,  96.8 , 105.6 , 101.2 ,  79.2 ],
       [ 74.1 ,  78.  ,  81.9 ,  85.8 ,  93.6 ,  89.7 ,  70.2 ],
       [ 60.8 ,  64.  ,  67.2 ,  70.4 ,  76.8 ,  73.6 ,  57.6 ],
       [ 87.4 ,  92.  ,  96.6 , 101.2 , 110.4 , 105.8 ,  82.8 ],
       [ 54.15,  57.  ,  59.85,  62.7 ,  68.4 ,  65.55,  51.3 ]])

In [44]:
# Exercise 2: Hourly Load Curve for One Substation
# Hourly profile (normalized)
hourly_profile = np.linspace(0.6, 1.2, 24)  # shape (24,)

# Load for substation S1 across 24 hours
hourly_load_S1 = base_load[0] * hourly_profile
hourly_load_S1

array([ 52.8       ,  55.09565217,  57.39130435,  59.68695652,
        61.9826087 ,  64.27826087,  66.57391304,  68.86956522,
        71.16521739,  73.46086957,  75.75652174,  78.05217391,
        80.34782609,  82.64347826,  84.93913043,  87.23478261,
        89.53043478,  91.82608696,  94.12173913,  96.4173913 ,
        98.71304348, 101.00869565, 103.30434783, 105.6       ])

In [46]:
# Exercise 3: Full Grid Hourly Load Matrix (5 substations × 24 hours)
# Reshape base_load to (5, 1)
substation_matrix = base_load[:, np.newaxis]  # shape (5, 1)

# Broadcast with hourly_profile (24,) → result: (5, 24)
hourly_matrix = substation_matrix * hourly_profile
hourly_matrix

array([[ 52.8       ,  55.09565217,  57.39130435,  59.68695652,
         61.9826087 ,  64.27826087,  66.57391304,  68.86956522,
         71.16521739,  73.46086957,  75.75652174,  78.05217391,
         80.34782609,  82.64347826,  84.93913043,  87.23478261,
         89.53043478,  91.82608696,  94.12173913,  96.4173913 ,
         98.71304348, 101.00869565, 103.30434783, 105.6       ],
       [ 46.8       ,  48.83478261,  50.86956522,  52.90434783,
         54.93913043,  56.97391304,  59.00869565,  61.04347826,
         63.07826087,  65.11304348,  67.14782609,  69.1826087 ,
         71.2173913 ,  73.25217391,  75.28695652,  77.32173913,
         79.35652174,  81.39130435,  83.42608696,  85.46086957,
         87.49565217,  89.53043478,  91.56521739,  93.6       ],
       [ 38.4       ,  40.06956522,  41.73913043,  43.40869565,
         45.07826087,  46.74782609,  48.4173913 ,  50.08695652,
         51.75652174,  53.42608696,  55.09565217,  56.76521739,
         58.43478261,  60.10434783,  6

In [47]:
# Exercise 4: Subtract Regional Mean from Each Substation (Centering)
regional_mean = np.mean(base_load)  # scalar
centered_load = base_load - regional_mean
centered_load

array([ 12.2,   2.2, -11.8,  16.2, -18.8])

In [48]:
# Exercise 5: Add Load Adjustment Matrix (5×1) to Weekly Load (5×7)
# Adjustment per substation (e.g., due to DERs or losses)
adjustment = np.array([[2], [-1], [0], [3], [-2]])  # shape (5, 1)

# Apply to weekly load
adjusted_weekly = weekly_load + adjustment
adjusted_weekly

array([[ 85.6 ,  90.  ,  94.4 ,  98.8 , 107.6 , 103.2 ,  81.2 ],
       [ 73.1 ,  77.  ,  80.9 ,  84.8 ,  92.6 ,  88.7 ,  69.2 ],
       [ 60.8 ,  64.  ,  67.2 ,  70.4 ,  76.8 ,  73.6 ,  57.6 ],
       [ 90.4 ,  95.  ,  99.6 , 104.2 , 113.4 , 108.8 ,  85.8 ],
       [ 52.15,  55.  ,  57.85,  60.7 ,  66.4 ,  63.55,  49.3 ]])

# Category 5: Indexing and Slicing (Set 5 of 7)

In [50]:
import numpy as np


# Substation IDs

substations = np.array(['S1', 'S2', 'S3', 'S4', 'S5'])


# Base load in MW

base_load = np.array([72, 95, 60, 88, 77])


# Voltage magnitudes (in p.u.)

voltages = np.array([1.01, 0.98, 1.00, 1.02, 0.99])


# Combine into a 2D array: [Substation, Load, Voltage]

grid_matrix = np.column_stack((substations, base_load, voltages))
grid_matrix

array([['S1', '72', '1.01'],
       ['S2', '95', '0.98'],
       ['S3', '60', '1.0'],
       ['S4', '88', '1.02'],
       ['S5', '77', '0.99']], dtype='<U32')

In [62]:
# Exercise 1: Slice First Three Substations
# Explanation: Extracts data for S1, S2, and S3. Useful for regional filtering.

sliced_array = grid_matrix[:3]
sliced_array

array([['S1', '72', '1.01'],
       ['S2', '95', '0.98'],
       ['S3', '60', '1.0']], dtype='<U32')

In [64]:
# Exercise 2: Access Load of Substation S4
# Explanation: Direct indexing to retrieve a specific value.

accessing_load = grid_matrix[3, 1]
accessing_load

'88'

In [66]:
# Exercise 3: Get All Voltage Values
# Explanation: Extracts the entire voltage column. .astype(float) ensures numeric type.

accessing_voltage = grid_matrix[:,2].astype(float)
accessing_voltage

array([1.01, 0.98, 1.  , 1.02, 0.99])

In [72]:
# Exercise 4: Reverse Substation Order
# Explanation: Slices with negative step to reverse the array. Useful for chronological or priority-based sorting.

reverse_substation = grid_matrix[::-1]
reverse_substation

array([['S5', '77', '0.99'],
       ['S4', '88', '1.02'],
       ['S3', '60', '1.0'],
       ['S2', '95', '0.98'],
       ['S1', '72', '1.01']], dtype='<U32')

In [74]:
# Exercise 5: Select Alternate Substations
# Explanation: Selects S1, S3, S5. Great for sampling or load balancing simulations.

alternate_substations = grid_matrix[::2]
alternate_substations

array([['S1', '72', '1.01'],
       ['S3', '60', '1.0'],
       ['S5', '77', '0.99']], dtype='<U32')

# Category 6: Reshaping Arrays (Set 6 of 7)

In [75]:
import numpy as np


# Base load for 5 substations over 7 days (MW)

np.random.seed(42)

base_load = np.random.randint(60, 100, size=(5, 7)) # shape (5, 7)

In [77]:
# Exercise 1: Flatten Weekly Load to 1D
# Explanation: Converts (5, 7) to (35,) Use case: Feeding into ML models or plotting time series

flatten_load = base_load.flatten()
flatten_load.shape

(35,)

In [79]:
# Exercise 2: Reshape to 7×5 (Days × Substations)
# Explanation: Transposes logic without .T Use case: Day-wise analysis across substations

transposed_matrix = base_load.reshape((7, 5))
transposed_matrix.shape

(7, 5)

In [82]:
# Exercise 3: Add Hourly Dimension (5 substations × 7 days × 1 hour)
# Explanation: Adds a third dimension Use case: Prepping for hourly expansion via broadcasting

hourly_dimension = base_load.reshape((5,7,1))
hourly_dimension.shape

(5, 7, 1)

In [83]:
# Exercise 4: Reshape to 1D and Back to 2D
# Explanation: Demonstrates reversible reshaping Use case: Data compression and restoration

reshaping = flatten_load.reshape((5,7))
reshaping.shape

(5, 7)

In [87]:
# Exercise 5: Reshape with Unknown Dimension
# Explanation: NumPy infers second dimension (35 columns) Use case: Useful when only one dimension is known

unknown_dimension = base_load.reshape(1,-1)
unknown_dimension.shape

(1, 35)

# Category 7: Data Cleaning (Set 7 of 7)

In [88]:
# Exercise 1: Identify Negative Voltage Readings
# Explanation: Creates a boolean mask for invalid (negative) voltage values.

voltages = np.array([1.01, 0.98, -1.02, 1.00, -0.95])
faulty_mask = voltages < 0

In [96]:
# Exercise 2: Replace Negative Voltages with NaN
# Explanation: Marks faulty readings as missing using np.nan.

replacing = np.where(faulty_mask, np.nan, voltages)
replacing

array([1.01, 0.98,  nan, 1.  ,  nan])

In [97]:
# Exercise 3: Remove Substations with Faulty Load
# Explanation: Filters out negative load values (e.g., due to sensor glitches).

filtered_values = voltages[voltages > 0]
filtered_values

array([1.01, 0.98, 1.  ])

In [98]:
# Exercise 4: Clip Voltage to Acceptable Range
# Explanation: Ensures all voltages stay within operational bounds.

clipped_voltage = np.clip(voltages, 0.90,1.00)
clipped_voltage

array([1.  , 0.98, 0.9 , 1.  , 0.9 ])

In [107]:
# Exercise 5: Detect Outliers Using Z-Score
# Explanation: Flags substations with anomalous load values.

mean_data = np.mean(voltages)
std_deviation = np.std(voltages)

z_score = (voltages - mean_data) / std_deviation
outliers = z_score > 0.80
outliers

array([ True, False, False,  True, False])

# Category 8: Missing Value Replacement (Set 8 of 8)

In [110]:
# Exercise 1: Replace NaNs with Mean Voltage
# Explanation: Imputes missing values using the column mean.

mean_voltage = np.nanmean(replacing)
voltages_filled = np.where(np.isnan(replacing), mean_voltage, replacing)
voltages_filled

array([1.01      , 0.98      , 0.99666667, 1.        , 0.99666667])

In [112]:
# Exercise 2: Forward Fill Missing Load (1D)
# Explanation: Fills missing values with the last known good value.

for i in range(1, len(replacing)):
    if np.isnan(replacing[i]):
        replacing[i] = replacing[i - 1]
replacing

array([1.01, 0.98, 0.98, 1.  , 1.  ])

In [None]:
# Exercise 3: Interpolate Missing Voltages
# Explanation: Uses linear interpolation to estimate missing values.



In [113]:
# Exercise 4: Replace NaNs with Median
# Explanation: Median is more robust to outliers than mean.

median_voltage = np.nanmedian(replacing)
voltages_filled1 = np.where(np.isnan(replacing), median_voltage, replacing)
voltages_filled1

array([1.01, 0.98, 0.98, 1.  , 1.  ])

In [118]:
# Exercise 5: Replace Missing with Substation-Specific Default
# Explanation: Uses predefined fallback values per substation.

default = np.array([1.01, 0.46, 1.2, 1.10, 0.85])
replacing = np.array([1.26, np.nan, 0.48, 1.25, np.nan])
filled_custom = np.where(np.isnan(replacing), default, replacing)
filled_custom

array([1.26, 0.46, 0.48, 1.25, 0.85])