In [13]:
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
import os
import psycopg2 as pg

SERVICE_ACCOUNT = "curso-apache-beam-gcp.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SERVICE_ACCOUNT

CLOUD_STORAGE = "gs://curso-apache-beam-gcp-n"
CLOUD_STORAGE_TEMP = f"{CLOUD_STORAGE}/temp"
CLOUD_STORAGE_TEMPLATE_FILE = f"{CLOUD_STORAGE}/template/curso-apache-beam-postgres-bq-local-teste"

BIG_QUERY_TABLE = f"curso-apache-beam-gcp:cursoapachebeamgcpdataset.users"

pipelines_options = PipelineOptions.from_dictionary({
    'project': 'curso-apache-beam-gcp',
    'runner': 'DataflowRunner',
    'region': 'us',
    'staging_location': CLOUD_STORAGE_TEMP,
    'temp_location': CLOUD_STORAGE_TEMP,
    'template_location': CLOUD_STORAGE_TEMPLATE_FILE,
    'save_main_session': True,
})

class ReadUsersFromPostgres(beam.DoFn):

    def __init__(self, *unused_args, **unused_kwargs):
        super().__init__(*unused_args, **unused_kwargs)
        self.db_config = {
            "host": "localhost",
            "port": 5432,
            "database": "postgres",
            "user": "postgres",
        }

    def setup(self):
        # Establish the database connection
        self.conn = pg.connect(**self.db_config)
        self.cursor = self.conn.cursor()

    def process(self, element):
        # Fetch data from the database
        self.cursor.execute("SELECT * FROM users")
        for row in self.cursor.fetchall():
            yield row

    def teardown(self):
        # Close the database connection
        self.cursor.close()
        self.conn.close()


class FormatUsersToBq(beam.DoFn):

    def __init__(self, *unused_args, **unused_kwargs):
        super().__init__(*unused_args, **unused_kwargs)

    def process(self, element):
        name, email, age = element

        # Validate and clean data
        if not isinstance(name, str) or not name.strip():
            name = "Unknown"

        if not isinstance(email, str) or "@" not in email or "." not in email:
            email = "invalid@example.com"

        if not isinstance(age, int) or age <= 0:
            age = None  # Use NULL in BigQuery for invalid ages

        # Yield formatted dictionary
        yield {
            "Name": name,
            "Email": email,
            "Age": age
        }


with beam.Pipeline(
        options=pipelines_options
) as p:
    postgres_rows = (
            p
            | "Create Input" >> beam.Create([None])
            | "Lendo Bando de Dados" >> beam.ParDo(ReadUsersFromPostgres())
            | "Formatando dados para o Big Query" >> beam.ParDo(FormatUsersToBq())
            | 'Enviando para o Big Query' >> beam.io.WriteToBigQuery(
        table=BIG_QUERY_TABLE,
        write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,
        create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
        custom_gcs_temp_location=CLOUD_STORAGE_TEMP)
    )





In [20]:
from google.cloud.secretmanager import GetSecretRequest

def get_secret(project_id: str, secret_id: str) -> GetSecretRequest:
    """
    Get information about the given secret. This only returns metadata about
    the secret container, not any secret material.
    """

    # Import the Secret Manager client library.
    from google.cloud import secretmanager

    # Create the Secret Manager client.
    client = secretmanager.SecretManagerServiceClient()

    # Build the resource name of the secret.
    name = client.secret_path(project_id, secret_id)

    # Get the secret.
    response = client.get_secret(request={"name": name})

    # Get the replication policy.
    if "automatic" in response.replication:
        replication = "AUTOMATIC"
    elif "user_managed" in response.replication:
        replication = "MANAGED"
    else:
        raise Exception(f"Unknown replication {response.replication}")

    # Print data about the secret.
    print(f"Got secret {response.name} with replication policy {replication}")

get_secret("curso-apache-beam-gcp", "coiso")

Got secret projects/223295794346/secrets/coiso with replication policy AUTOMATIC
