<a href="https://colab.research.google.com/github/DOJO-Smart-Ways/DOJO-Beam-Transforms/blob/pbi-footprint/unit_test_pipeline_components/GenericArithmeticOperationTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/DOJO-Smart-Ways/DOJO-Beam-Transforms.git@main#egg=dojo-beam-transforms

In [7]:
import unittest
from apache_beam.testing.test_pipeline import TestPipeline
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
import apache_beam as beam

from pipeline_components import data_enrichment as de

In [8]:
from apache_beam import Create, ParDo


In [9]:

class GenericArithmeticOperationTest(unittest.TestCase):
    def test_basic_arithmetic_operation(self):
        with TestPipeline() as p:
            input_data = [
                {'COLUMN_1': 10, 'COLUMN_2': 20, 'COLUMN_3': 2},
                {'COLUMN_1': 30, 'COLUMN_2': 10, 'COLUMN_3': 4}
            ]
            expected_output = [
                {'COLUMN_1': 10, 'COLUMN_2': 20, 'COLUMN_3': 2, 'COLUMN_4': 15},
                {'COLUMN_1': 30, 'COLUMN_2': 10, 'COLUMN_3': 4, 'COLUMN_4': 10}
            ]

            # Definindo as operações
            operations = [
                {
                    'operands': ['COLUMN_1', 'COLUMN_2', 'COLUMN_3'],
                    'result_column': 'COLUMN_4',
                    'formula': lambda c1, c2, c3: (c1 + c2) / c3 if c3 else 0
                }
            ]

            result = (
                p
                | Create(input_data)
                | ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))

    def test_handling_missing_values(self):
        with TestPipeline() as p:
            input_data = [
                {'COLUMN_1': 10, 'COLUMN_2': 20},  # COLUMN_3 está ausente
                {'COLUMN_1': 30, 'COLUMN_2': 10, 'COLUMN_3': 0}  # COLUMN_3 presente, mas é 0
            ]
            expected_output = [
                {'COLUMN_1': 10, 'COLUMN_2': 20, 'COLUMN_4': 0},  # Resultado devido à ausência de COLUMN_3
                {'COLUMN_1': 30, 'COLUMN_2': 10, 'COLUMN_3': 0, 'COLUMN_4': 0}  # Resultado devido a COLUMN_3 ser 0
            ]

            operations = [
                {
                    'operands': ['COLUMN_1', 'COLUMN_2', 'COLUMN_3'],
                    'result_column': 'COLUMN_4',
                    'formula': lambda c1, c2, c3: (c1 + c2) / c3 if c3 else 0
                }
            ]

            result = (
                p
                | "CreateInputDataForMissingValues" >> Create(input_data)
                | "ApplyArithmeticOperationForMissingValues" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))


    def test_operation_error_handling(self):
        with TestPipeline() as p:
            input_data = [
                {'COLUMN_1': 10, 'COLUMN_2': 20, 'COLUMN_3': 0},  # Espera-se uma divisão por zero
            ]
            expected_output = [
                {'COLUMN_1': 10, 'COLUMN_2': 20, 'COLUMN_3': 0, 'COLUMN_4': None}  # Tratamento do erro com None
            ]

            # Nota: Esta configuração intencionalmente não trata divisão por zero dentro da lambda
            operations = [
                {
                    'operands': ['COLUMN_1', 'COLUMN_2', 'COLUMN_3'],
                    'result_column': 'COLUMN_4',
                    'formula': lambda c1, c2, c3: (c1 + c2) / c3  # Sem tratamento de erro aqui
                }
            ]

            result = (
                p
                | "CreateInputDataForErrorHandling" >> Create(input_data)
                | "ApplyArithmeticOperationWithError" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))


    def test_float_arithmetic_operations(self):
        with TestPipeline() as p:
            input_data = [
                {'COLUMN_1': 10.5, 'COLUMN_2': 2.5, 'COLUMN_3': 2.0},
            ]
            expected_output = [
                {'COLUMN_1': 10.5, 'COLUMN_2': 2.5, 'COLUMN_3': 2.0, 'COLUMN_4': 11.0},  # 10.5 + 2.5 - 2.0
            ]

            operations = [
                {
                    'operands': ['COLUMN_1', 'COLUMN_2', 'COLUMN_3'],
                    'result_column': 'COLUMN_4',
                    'formula': lambda c1, c2, c3: c1 + c2 - c3
                }
            ]

            result = (
                p
                | "CreateInputDataForFloatOperations" >> Create(input_data)
                | "ApplyFloatArithmeticOperation" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))

    def test_tripled_multiplication_operations(self):
        with TestPipeline() as p:
            input_data = [
                {'A': 2, 'B': 3, 'C': 4},
            ]
            expected_output = [
                {'A': 2, 'B': 3, 'C': 4, 'RESULT': 24},  # 2 * 3 * 4
            ]

            operations = [
                {
                    'operands': ['A', 'B', 'C'],
                    'result_column': 'RESULT',
                    'formula': lambda a, b, c: a * b * c
                }
            ]

            result = (
                p
                | "CreateInputForTripledMultiplication" >> Create(input_data)
                | "ApplyTripledMultiplication" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))

    def test_combined_sum_and_subtraction_with_ints_and_floats(self):
        with TestPipeline() as p:
            input_data = [
                {'D': 100, 'E': 50.5, 'F': 25.25},
            ]
            expected_output = [
                {'D': 100, 'E': 50.5, 'F': 25.25, 'FINAL_RESULT': 74.75},  # 100 - 50.5 + 25.25
            ]

            operations = [
                {
                    'operands': ['D', 'E', 'F'],
                    'result_column': 'FINAL_RESULT',
                    'formula': lambda d, e, f: d - e + f
                }
            ]

            result = (
                p
                | "CreateInputForSumAndSubtraction" >> Create(input_data)
                | "ApplySumAndSubtraction" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))


    def test_data_type_preservation(self):
        with TestPipeline() as p:
            input_data = [
                {'G': 1.5, 'H': 2, 'I': 3.0},  # Mistura de float e int
            ]
            expected_output = [
                {'G': 1.5, 'H': 2, 'I': 3.0, 'MIXED_RESULT': 9.0},  # (1.5 * 2) * 3.0 = 9.0 (float)
            ]

            operations = [
                {
                    'operands': ['G', 'H', 'I'],
                    'result_column': 'MIXED_RESULT',
                    'formula': lambda g, h, i: (g * h) * i
                }
            ]

            result = (
                p
                | "CreateInputForDataTypePreservation" >> Create(input_data)
                | "ApplyDataTypePreservation" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))

    def test_negative_results_operations(self):
        with TestPipeline() as p:
            input_data = [
                {'J': -10, 'K': 5, 'L': -2},  # Operação resultará em valor negativo
            ]
            expected_output = [
                {'J': -10, 'K': 5, 'L': -2, 'NEGATIVE_RESULT': -20},  # -10 + (5 * -2) = -20
            ]

            operations = [
                {
                    'operands': ['J', 'K', 'L'],
                    'result_column': 'NEGATIVE_RESULT',
                    'formula': lambda j, k, l: j + (k * l)
                }
            ]

            result = (
                p
                | "CreateInputForNegativeResults" >> Create(input_data)
                | "ApplyNegativeResultsOperation" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))

    def test_complex_chain_of_operations(self):
        with TestPipeline() as p:
            input_data = [
                {'M': 4, 'N': 8, 'O': 2, 'P': 5},  # Complex chain calculation
            ]
            expected_output = [
                {'M': 4, 'N': 8, 'O': 2, 'P': 5, 'COMPLEX_RESULT': 19},  # (((4 + 8) * 2) - 5) = 19
            ]

            operations = [
                {
                    'operands': ['M', 'N', 'O', 'P'],
                    'result_column': 'COMPLEX_RESULT',
                    'formula': lambda m, n, o, p: (((m + n) * o) - p)
                }
            ]

            result = (
                p
                | "CreateInputForComplexChain" >> Create(input_data)
                | "ApplyComplexChainOperation" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))

    def test_complex_chain_of_operations_racional_number(self):
        with TestPipeline() as p:
            input_data = [
                {'M': 4.12341, 'N': 8.000123, 'O': 2.765, 'P': -5.32775},  # Complex chain calculation
            ]
            expected_output = [
                {'M': 4.12341, 'N': 8.000123, 'O': 2.765, 'P': -5.32775, 'COMPLEX_RESULT': 40.40542},  #
            ]

            operations = [
                {
                    'operands': ['M', 'N', 'O', 'P'],
                    'result_column': 'COMPLEX_RESULT',
                    'formula': lambda m, n, o, p: round((((m + n)*n / o) - p),5)
                }
            ]

            result = (
                p
                | "CreateInputForComplexChain" >> Create(input_data)
                | "ApplyComplexChainOperation" >> ParDo(de.GenericArithmeticOperation(operations))
            )

            assert_that(result, equal_to(expected_output))


    def test_operations_racional_number(self):
        with TestPipeline() as p:
            input_data = [
                {'M': 4.12341, 'N': 8.000123, 'O': 2.765, 'P': -5.32775},  # Complex chain calculation
            ]
            expected_output = [
                {'M': 4.12341, 'N': 8.000123, 'O': 2.765, 'P': -5.32775, 'COMPLEX_RESULT': 0.72765},  #
            ]

            operations = [
                {
                    'operands': ['M', 'N', 'O', 'P'],
                    'result_column': 'COMPLEX_RESULT',
                    'formula': lambda m, n, o, p: round(((m - n) / p),5)
                }
            ]

            result = (
                p
                | "CreateInputForComplexChain" >> Create(input_data)
                | "ApplyComplexChainOperation" >> ParDo(de.GenericArithmeticOperation(operations))
            )
            assert_that(result, equal_to(expected_output))

In [10]:
def run_tests():
    suite = unittest.TestSuite()
    loader = unittest.TestLoader()
    suite.addTests(loader.loadTestsFromTestCase(GenericArithmeticOperationTest))

    runner = unittest.TextTestRunner()
    runner.run(suite)

run_tests()

...........
----------------------------------------------------------------------
Ran 11 tests in 7.497s

OK
