In [None]:
from google.cloud import dataproc_v1
from google.protobuf.duration_pb2 import Duration

# Function to submit a PySpark job to the Dataproc cluster
def submit_pyspark_job(cluster_name, project_id, region, gcs_bucket, input_path, output_path, database, username, password):
    job_client = dataproc_v1.JobControllerClient(
        client_options={"api_endpoint": f"{region}-dataproc.googleapis.com:443"},
        credentials=credentials
    )
    
    # Define PySpark job
    job = {
        "placement": {"cluster_name": self.cluster_name},
        "pyspark_job": {
                    "main_python_file_uri": f"gs://{self.gcs_bucket}/pyspark_job.py",
                    "jar_file_uris": [
                        f"gs://{self.gcs_bucket}/postgresql-42.3.1.jar",
                        f"gs://{self.gcs_bucket}/postgres-socket-factory-1.20.1.jar",
                        f"gs://{self.gcs_bucket}/jdbc-socket-factory-core-1.20.1.jar",
                        f"gs://{self.gcs_bucket}/google-auth-library-credentials-1.24.1.jar",
                        f"gs://{self.gcs_bucket}/google-auth-library-oauth2-http-1.24.1.jar"
                    ],
                    "args": [input_path, output_path, self.database, self.username, self.password],
                },
            }
    
    # Submit the job to Dataproc
    operation = job_client.submit_job_as_operation(
        request={"project_id": project_id, "region": region, "job": job}
    )
    
    response = operation.result(timeout=600)  # Wait for job completion
    return response
