In [1]:
def remove_outliers(df, threshold=1.5):
    """
    Remove outliers from a DataFrame using the IQR method.
    
    Parameters:
        df (pd.DataFrame): The DataFrame containing the data.
        threshold (float): The multiplier for the IQR to define outlier boundaries.
        
    Returns:
        pd.DataFrame: A new DataFrame with outliers removed.
    """
    # Automatically select numeric columns
    numeric_cols = df.select_dtypes(include=['number']).columns
    
    # Create a copy of the DataFrame to avoid modifying the original
    filtered_df = df.copy()
    
    for col in numeric_cols:
        # Calculate Q1 (25th percentile) and Q3 (75th percentile)
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        
        # Calculate the Interquartile Range (IQR)
        IQR = Q3 - Q1
        
        # Define lower and upper bounds for outliers
        lower_bound = Q1 - threshold * IQR
        upper_bound = Q3 + threshold * IQR
        
        # Remove rows with outliers
        filtered_df = filtered_df[(filtered_df[col] >= lower_bound) & (filtered_df[col] <= upper_bound)]
    
    return filtered_df

# Remove outliers and return a cleaned DataFrame
df_cleaned = remove_outliers(df)
print(f"Original DataFrame shape: {df.shape}")
print(f"Cleaned DataFrame shape: {df_cleaned.shape}")

NameError: name 'df' is not defined