<a href="https://colab.research.google.com/github/DOJO-Smart-Ways/DOJO-Beam-Transforms/blob/main/unit_test_pipeline_components/RenameColumns.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install apache_beam

Collecting apache_beam
  Downloading apache_beam-2.54.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting crcmod<2.0,>=1.7 (from apache_beam)
  Downloading crcmod-1.7.tar.gz (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.7/89.7 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting orjson<4,>=3.9.7 (from apache_beam)
  Downloading orjson-3.9.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.5/138.5 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dill<0.3.2,>=0.3.1.1 (from apache_beam)
  Downloading dill-0.3.1.1.tar.gz (151 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.0/152.0 kB[0m [31m20.2 MB/s[0m eta [36m0:00

In [29]:
import unittest
from apache_beam.testing.test_pipeline import TestPipeline
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
import apache_beam as beam
from datetime import datetime, timedelta
import pandas as pd

class RenameColumns(beam.DoFn):
    def __init__(self, column_mapping):
        self.column_mapping = column_mapping

    def process(self, element):
        new_element = {self.column_mapping.get(k, k): v for k, v in element.items()}
        yield new_element

class TestRenameColumns(unittest.TestCase):
    def test_rename_columns_full_mapping(self):
        with TestPipeline() as p:
            input_data = [{'a': 1, 'b': 2}]
            expected_data = [{'x': 1, 'y': 2}]
            column_mapping = {'a': 'x', 'b': 'y'}

            output = (
                p
                | "CreateInput" >> beam.Create(input_data)
                | "RenameColumns" >> beam.ParDo(RenameColumns(column_mapping))
            )

            assert_that(output, equal_to(expected_data))

    def test_rename_columns_partial_mapping(self):
        with TestPipeline() as p:
            input_data = [{'a': 1, 'b': 2}]
            expected_data = [{'x': 1, 'b': 2}]
            column_mapping = {'a': 'x'}

            output = (
                p
                | "CreateInput" >> beam.Create(input_data)
                | "RenameColumns" >> beam.ParDo(RenameColumns(column_mapping))
            )

            assert_that(output, equal_to(expected_data))

    def test_rename_columns_no_mapping(self):
        with TestPipeline() as p:
            input_data = [{'a': 1, 'b': 2}]
            expected_data = [{'a': 1, 'b': 2}]
            column_mapping = {}

            output = (
                p
                | "CreateInput" >> beam.Create(input_data)
                | "RenameColumns" >> beam.ParDo(RenameColumns(column_mapping))
            )

            assert_that(output, equal_to(expected_data))


    def test_rename_columns_dtype_consistency_with_timestamp(self):
        with TestPipeline() as p:
            initial_time_datetime = datetime.now()
            timestamp = pd.to_datetime(datetime.now())

            input_data = [{'datetime': initial_time_datetime, 'timestamp': timestamp}]
            expected_data = [{'new_datetime': initial_time_datetime, 'new_timestamp': timestamp}]
            column_mapping = {'datetime': 'new_datetime', 'timestamp': 'new_timestamp'}

            output = (
                p
                | beam.Create(input_data)
                | beam.ParDo(RenameColumns(column_mapping))
            )

            assert_that(output, equal_to(expected_data))




def run_tests():
    suite = unittest.TestSuite()
    loader = unittest.TestLoader()
    suite.addTests(loader.loadTestsFromTestCase(TestRenameColumns))

    runner = unittest.TextTestRunner()
    runner.run(suite)

# Chamada da função de execução dos testes
run_tests()

....
----------------------------------------------------------------------
Ran 4 tests in 2.399s

OK
