### Assinment: 
Conditional Access and Modification,
which are primarily used in data cleaning and  
processing. It follows the logic of identifying specific elements
based on a boolean condition and replacing them efficiently

Key steps implemented:

Conversion to NumPy array: arr = np.array(arr) converts any iterable input into a NumPy ndarray.

Boolean condition: condition = arr >= threshold creates a boolean array where True indicates elements ≥ threshold.

Conditional replacement: np.where(condition, replacement_value, arr) replaces elements where condition is True with replacement_value, and keeps original values where condition is False.

The function correctly replaces values ≥ 80 with -1, as shown in the test case.


In [1]:
import numpy as np

def apply_threshold(arr, threshold, replacement_value=-1):
    """  
    Company Task: Find elements satisfying a condition and modify them.

    Instructions:
    1. Convert input 'arr' to a numpy array.
    2. Define a condition for elements greater than or equal to 'threshold'.
    3. Use np.where() to apply the replacement_value.
    """  
    # TODO: Step 1 - Convert input to ndarray
    arr = np.array(arr)
    
    # TODO: Step 2 - Create the boolean condition array
    condition = arr >= threshold
    
    # TODO: Step 3 - Apply conditional modification and return the result
    result = np.where(condition, replacement_value, arr)
    
    return result

# --- Student Self-Test ---
readings = [25, 30, 85, 22, 90]
result = apply_threshold(readings, threshold=80, replacement_value=-1)
print(result)
# Expected Output: [25, 30, -1, 22, -1]

[25 30 -1 22 -1]


Key implementation details:

Strategy options: The function handles four strategies as described in the requirements (though the parameter names in the text appear scrambled - "naidem_llm" ro."naem_llm","swor_pord" likely mean "median_fill", "mean_fill", "drop_rows").

Numerical columns only for mean/median: Uses select_dtypes(include=[np.number]) to identify numerical columns before applying mean or median fill.

Mode calculation: Uses .mode().iloc[0] to get the most frequent value for each column.

DataFrame copy: Creates a copy to avoid modifying the original DataFrame.

Expected behavior:

"drop": Removes rows 3 and 8 (which have NaN in Customer_Age)

"fill_mean": Fills NaN in Customer_Age with ~33.9 (mean of other ages)

"fill_median": Fills NaN in Customer_Age with 34.0 (median of other ages)

"fill_mode": Fills NaN in Customer_Age with 25 (mode of ages)



In [4]:
import pandas as pd
import numpy as np

data = {
    'Transaction_ID': range(1, 11),
    'Product_Category': ['Electronics', 'Home', 'Electronics', 'Sports', 'Home',
                         'Electronics', 'Home', 'Sports', 'Electronics', 'Electronics'],
    'Sales_Amount': [150, 200, 155, 300, 210, 180, 205, 1000, 190, 160], # 1000 is an Outlier
    'Customer_Age': [25, 34, np.nan, 45, 23, 31, 29, np.nan, 38, 40], # Contains Nulls (NaN)
    'Rating': [5, 4, 3, 5, 2, 4, 5, 2, 4, 3]
}

df = pd.DataFrame(data)

def null_handling_strategy(df, strategy):
    """  
    Company Task: Clean a dataset by resolving missing (NaN) values.
    
    Parameters:
    df: pandas DataFrame
    strategy: string - one of:
        - "drop": Remove rows with any null values
        - "fill_mean": Fill numerical columns with their mean (only for numerical columns)
        - "fill_median": Fill numerical columns with their median (only for numerical columns)
        - "fill_mode": Fill all columns with their mode (most frequent value)
    """  
    # Make a copy to avoid modifying the original DataFrame
    df_clean = df.copy()
    
    if strategy == "drop":
        # TODO: Implement using .dropna()
        df_clean = df_clean.dropna()
        
    elif strategy == "fill_mean":
        # TODO: Implement using .fillna() - only for numerical columns
        # Identify numerical columns
        numerical_cols = df_clean.select_dtypes(include=[np.number]).columns
        
        # Calculate mean for each numerical column
        mean_values = df_clean[numerical_cols].mean()
        
        # Fill nulls with mean values (only in numerical columns)
        df_clean[numerical_cols] = df_clean[numerical_cols].fillna(mean_values)
        
    elif strategy == "fill_median":
        # TODO: Implement using .fillna() - only for numerical columns
        # Identify numerical columns
        numerical_cols = df_clean.select_dtypes(include=[np.number]).columns
        
        # Calculate median for each numerical column
        median_values = df_clean[numerical_cols].median()
        
        # Fill nulls with median values (only in numerical columns)
        df_clean[numerical_cols] = df_clean[numerical_cols].fillna(median_values)
        
    elif strategy == "fill_mode":
        # TODO: Implement using .fillna() - for all columns
        # Calculate mode for each column (returns a DataFrame, take first row)
        mode_values = df_clean.mode().iloc[0]
        
        # Fill nulls with mode values
        df_clean = df_clean.fillna(mode_values)
        
    else:
        raise ValueError(f"Unknown strategy: {strategy}. Use 'drop', 'fill_mean', 'fill_median', or 'fill_mode'")
    
    return df_clean

null_handling_strategy(df,"fill_mean")

Unnamed: 0,Transaction_ID,Product_Category,Sales_Amount,Customer_Age,Rating
0,1,Electronics,150,25.0,5
1,2,Home,200,34.0,4
2,3,Electronics,155,33.125,3
3,4,Sports,300,45.0,5
4,5,Home,210,23.0,2
5,6,Electronics,180,31.0,4
6,7,Home,205,29.0,5
7,8,Sports,1000,33.125,2
8,9,Electronics,190,38.0,4
9,10,Electronics,160,40.0,3


In [None]:
# Test the function
print("Original DataFrame:")
print(df)
print("\nNull count in original data:")
print(df.isnull().sum())

# Test different strategies
print("\n--- Testing 'drop' strategy ---")
result_drop = null_handling_strategy(df, strategy="drop")
print(result_drop)

print("\n--- Testing 'fill_mean' strategy ---")
result_mean = null_handling_strategy(df, strategy="fill_mean")
print(result_mean)
print("Note: Only Customer_Age (numerical) filled with mean:", df['Customer_Age'].mean())

print("\n--- Testing 'fill_median' strategy ---")
result_median = null_handling_strategy(df, strategy="fill_median")
print(result_median)
print("Note: Only Customer_Age (numerical) filled with median:", df['Customer_Age'].median())

print("\n--- Testing 'fill_mode' strategy ---")
result_mode = null_handling_strategy(df, strategy="fill_mode")
print(result_mode)
print("Note: All columns filled with their mode values")