## Creating Testing for a Simple Pipeline

In [1]:
import pytest

def extract(filepath):
    """
    Extraction function that reads an integer value from a file.
    """
    with open(filepath, 'r') as file:
        input_data = {'value': int(file.read().strip())}
    return input_data


def transform(input_data): 
    """
    Transformation function to double the input value.
    Assumes input_data is a dictionary with 'value' key.
    Now also ensures that values are positive.
    """

    value = input_data['value']

    if value < 0:
        raise ValueError('Value must be positive.')

    output_data = {'value': input_data['value'] * 2}
    return output_data


def load(output_data, database):
    """
    Loading function that moves the transformed data into a database.
    Assumes database is a dictionary and output_data contains 'value' key.
    """
    database['value'] = output_data['value']

### Unit Testing

In [None]:
def test_transform_unittest():
    
    # define input and expected output data formats
    
    # -- < your code here > --
    
    # exercise
    
     # -- < your code here > --

    # verify
    
     assert result # -- < your code here > --

### Validation Testing

In [None]:
def test_transform_validation():

    # define input data format
    
    # -- < your code here > --

    # define data condition for expectations
    with pytest.raises(ValueError) as excinfo:
        transform(input_data)
        
    assert # -- < your code here > --

### Integration Testing 

In [None]:
def test_load_transform_integration():

    # define input and expected output data formats
    # -- < your code here > --

    # add transform and load
    transformed_data = transform(input_data)
    load(transformed_data, database)

    # verify
    assert database # -- < your code here > --

### End-to-End Testing

In [None]:
def test_pipeline_end_to_end():

    # define input and expected output data formats
    
    # -- < your code here > --

    # add open file to the test with value 10
    with open(test_input_file, 'w') as file:
        file.write('10')

    # add extract, transform, load
    # -- < your code here > --

    # verify
    assert database # -- < your code here > --

### Performance Testing

In [None]:
import time

In [None]:
def test_transform_performance():

    # define input data formats
    # -- < your code here > --

    # define start time
    start_time = time.time()

    # integrate through the transform step
    for data in input_data:
        transform(data)

    # define the endtime
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Elapsed time for processing 1 million data points was {elapsed_time} seconds.")

### Resilence Testing

In [None]:
import random

In [None]:
def test_transform_resilience_timeout(input_data):
    """
    Transformation function to double the input value.
    Assumes input_data is a dictionary with 'value' key
    """

    # includes a random chance to raise a TimeoutError
    if random.random() < 0.1:  # 10% chance to raise an error
        raise TimeoutError('Temporary network outage.')
    
    # -- < your code here > --
    
    return # -- < your code here > --

In [None]:
def test_transform_resilience_timeout_retry5():
    
    # Setup
    
    # -- < your code here > --
    
    # Exercise
    for i in range(5):
        try:
            result = transform(input_data)
            break
        except TimeoutError:
            if i == 4:  # We've reached our maximum attempts
                raise  # Re-raise the last exception
    else:
        raise ValueError("Transform function failed after 5 attempts.")
        
    # Verify
    assert # -- < your code here > --