    Task: Complete Pipeline for a Dataset
1. Objective: Build a complex pipeline with multiple transformations.
2. Steps:
    - Load a sample dataset.
    - Define a transformation pipeline with both imputation and scaling.

In [14]:
import pytest
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Test for Imputation
def test_imputation():
    # Sample data with missing values
    data = {'A': [1, 2, np.nan, 4], 'B': [5, np.nan, 7, 8]}
    df = pd.DataFrame(data)
    
    imputer = SimpleImputer(strategy='mean')
    imputed_data = imputer.fit_transform(df)
    
    # Assert that missing values are replaced
    assert not np.any(np.isnan(imputed_data)), "Missing values exist after imputation!"

# Test for Scaling
def test_scaling():
    # Sample data
    data = {'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}
    df = pd.DataFrame(data)
    
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df)
    
    # Assert that the scaled data has mean=0 and std=1 for each column
    assert np.abs(np.mean(scaled_data[:, 0])) < 0.1, "Column A mean is not close to 0"  # Column A
    assert np.abs(np.mean(scaled_data[:, 1])) < 0.1, "Column B mean is not close to 0"  # Column B
    assert np.abs(np.std(scaled_data[:, 0]) - 1) < 0.1, "Column A std is not close to 1"
    assert np.abs(np.std(scaled_data[:, 1]) - 1) < 0.1, "Column B std is not close to 1"
