# NumPy Programming Assignment

Part 1: Smart Array Factory
Task: Create a function that allows students to generate specific array types quickly using built-in NumPy methods.


In [4]:
import numpy as np

def array_factory(mode, shape, value=None):
    """
    A factory function to generate specific NumPy arrays based on user input.

    Args:
        mode (str): The type of array to generate. 
                    Options: 'zeros', 'ones', 'full', 'identity'.
        shape (int or tuple): The dimensions of the array (e.g., (3,3) or 5).
        value (float, optional): The fill value required only if mode is 'full'.

    Returns:
        numpy.ndarray: The generated NumPy array.

    Raises:
        ValueError: If mode is 'full' but no value is provided, or if mode is unknown.
    """
    
    if mode == 'zeros':
        # Generates an array of zeros
        return np.zeros(shape)
    
    elif mode == 'ones':
        # Generates an array of ones
        return np.ones(shape)
    
    elif mode == 'full':
        # Generates an array filled with a specific constant value
        if value is None:
            raise ValueError("Mode 'full' requires the 'value' parameter to be specified.")
        return np.full(shape, value)
    
    elif mode == 'identity':
        # Generates a square identity matrix
        # Extract the size if a tuple is provided (e.g., (3,3) becomes 3)
        n = shape[0] if isinstance(shape, (tuple, list)) else shape
        return np.eye(n)
    
    else:
        raise ValueError(f"The mode '{mode}' is not recognized.")

# --- Verification ---
if __name__ == "__main__":
    try:
        # Example: Create a 3x3 identity matrix
        print("Identity Matrix (3x3):\n", array_factory('identity', 3))
        # Example: Create a 2x3 array of zeros
        print("\nZeros Array (2x3):\n", array_factory('zeros', (2, 3)))
    except NameError:
        print("Please ensure NumPy is installed and the kernel is set to (.venv).")

Identity Matrix (3x3):
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]

Zeros Array (2x3):
 [[0. 0. 0.]
 [0. 0. 0.]]


Part 2: The secure_reshape_and_stack Function.

This function demonstrates how to handle data integration by transforming a flat data structure into a matrix and then combining it with an existing dataset

In [5]:
import numpy as np

def secure_reshape_and_stack(flat_data, target_shape, existing_dataset):
    """
    Transforms a flat array into a matrix and stacks it with an existing dataset.
    
    Args:
        flat_data (np.ndarray): The 1D array to be transformed.
        target_shape (tuple): The desired (rows, cols) for the new matrix.
        existing_dataset (np.ndarray): The dataset to which the new matrix will be added.
        
    Returns:
        np.ndarray: The combined dataset.
        
    Raises:
        ValueError: If the flat_data cannot be reshaped to target_shape,
                    or if the new matrix dimensions don't match the existing_dataset for stacking.
    """
    try:
        # 1. Reshape the flat data into the target matrix structure
        reshaped_matrix = flat_data.reshape(target_shape)
        
        # 2. Stack the new matrix vertically with the existing dataset
        # existing_dataset must have the same number of columns as target_shape[1]
        combined_dataset = np.vstack((existing_dataset, reshaped_matrix))
        
        return combined_dataset
        
    except ValueError as e:
        # Provide a descriptive error if dimensions are incompatible
        raise ValueError(f"Data integration failed: {e}")

# --- Example Usage ---
if __name__ == "__main__":
    # Create an existing dataset (2x3 matrix)
    base_data = np.array([[10, 20, 30], 
                          [40, 50, 60]])
    
    # Flat data to be integrated (6 elements)
    new_flat = np.array([70, 80, 90, 100, 110, 120])
    
    # Reshape to (2, 3) and stack with base_data
    result = secure_reshape_and_stack(new_flat, (2, 3), base_data)
    
    print("Combined Dataset:\n", result)

Combined Dataset:
 [[ 10  20  30]
 [ 40  50  60]
 [ 70  80  90]
 [100 110 120]]


part-3 The apply_threshold Function.

This function demonstrates Conditional Access and Modification, which are primarily used in data cleaning and processing. It follows the logic of identifying specific elements based on a boolean condition and replacing them efficiently.

In [6]:
import numpy as np

def apply_threshold(data, threshold, replacement_value):
    """
    Identifies elements exceeding a threshold and replaces them.
    
    Args:
        data (np.ndarray): The input array to be processed.
        threshold (float): The value above which replacements occur.
        replacement_value (float): The value to assign to elements exceeding the threshold.
        
    Returns:
        np.ndarray: A copy of the array with values modified.
    """
    # 1. Create a copy to avoid modifying the original data (Best Practice)
    processed_data = data.copy()
    
    # 2. Apply Boolean Indexing:
    # This finds all indices where the condition is True and replaces them in one step.
    processed_data[processed_data > threshold] = replacement_value
    
    return processed_data

# --- Example Usage ---
if __name__ == "__main__":
    # Sample data (e.g., sensor readings with some noise/outliers)
    sensor_readings = np.array([1.2, 4.5, 10.1, 3.8, 15.2, 2.1])
    
    # Cap any reading above 10.0 to exactly 10.0
    cleaned_data = apply_threshold(sensor_readings, threshold=10.0, replacement_value=10.0)
    
    print("Original Readings:", sensor_readings)
    print("Cleaned Data:    ", cleaned_data)

Original Readings: [ 1.2  4.5 10.1  3.8 15.2  2.1]
Cleaned Data:     [ 1.2  4.5 10.   3.8 10.   2.1]


TestCase

In [7]:
import numpy as np

# 1. Branch A Data: Raw flat list (8 elements)
# Represents sales for 4 months for 2 products [P1_M1, P2_M1, P1_M2, P2_M2...]
branch_a_flat = np.array([120, 150, 130, 170, 450, 160, 140, 180]) 

# 2. Branch B Data: Already a 4x2 matrix (4 months x 2 products)
branch_b_report = np.array([[110, 140],
                            [125, 155],
                            [135, 165],
                            [145, 175]])

# --- STEP 1: Using secure_reshape_and_stack ---
# We transform Branch A to (4x2) to match Branch B's structure and stack them
# This will result in an 8x2 matrix
integrated_report = secure_reshape_and_stack(branch_a_flat, (4, 2), branch_b_report)

print("Integrated Report (Before Cleaning):\n", integrated_report)

# --- STEP 2: Using apply_threshold ---
# Let's assume any sales value above 400 is an input error (Outlier)
# We will cap these errors at a standard maximum of 200
final_cleaned_report = apply_threshold(integrated_report, threshold=400, replacement_value=200)

print("\nFinal Cleaned Report (After Thresholding):\n", final_cleaned_report)
print("\nFinal Shape:", final_cleaned_report.shape) # Expected Output: (8, 2)

Integrated Report (Before Cleaning):
 [[110 140]
 [125 155]
 [135 165]
 [145 175]
 [120 150]
 [130 170]
 [450 160]
 [140 180]]

Final Cleaned Report (After Thresholding):
 [[110 140]
 [125 155]
 [135 165]
 [145 175]
 [120 150]
 [130 170]
 [200 160]
 [140 180]]

Final Shape: (8, 2)


In [4]:
v= np.array([1,2,3])
apply_threshold(v,2,-20)


array([  1, -20, -20])