In [2]:
import os

# Directory where JARs are located
jars_directory = "/usr/local/spark/jars/"

# List of JAR filenames
jar_files = [
    "commons-pool2-2.12.1.jar",
    "kafka-clients-4.0.0.jar",
    "spark-sql-kafka-0-10_2.13-4.0.0.jar",
    "spark-token-provider-kafka-0-10_2.13-4.0.0.jar",
    "mongodb-driver-core-5.5.1.jar",
    "mongodb-driver-sync-5.5.1.jar",
    "mongo-spark-connector_2.13-10.5.0.jar",
    "bson-5.5.1.jar",
]

dependencies = ",".join([os.path.join(jars_directory, jar) for jar in jar_files])

from pyspark.sql import SparkSession

# Crear la sesión de Spark con el conector de MongoDB
spark = SparkSession.builder \
    .appName("MongoDBSparkConnector") \
    .config("spark.jars", dependencies) \
    .config("spark.mongodb.read.connection.uri", "mongodb://admin:12345678@host.docker.internal:27017") \
    .config("spark.mongodb.write.connection.uri", "mongodb://admin:12345678@host.docker.internal:27017") \
    .getOrCreate()

# Leer datos desde MongoDB (base de datos: mydb, colección: users)
df = spark.read \
    .format("mongodb") \
    .option("database", "spark") \
    .option("collection", "users") \
    .load()

# Mostrar los datos leídos
df.show()

# Escribir datos en otra colección de MongoDB
data = [("Alice", 29), ("Bob", 35), ("Cathy", 21)]
columns = ["Name", "age"]
df_to_write = spark.createDataFrame(data, columns)

df_to_write.write \
    .format("mongodb") \
    .option("database", "spark") \
    .option("collection", "users") \
    .mode("append") \
    .save()

print("Datos escritos en la colección 'users'")

+-----+--------------------+---+
| Name|                 _id|age|
+-----+--------------------+---+
|  Bob|68b1c037cbed2d4c7...| 35|
|Cathy|68b1c037cbed2d4c7...| 21|
|Alice|68b1c037cbed2d4c7...| 29|
+-----+--------------------+---+

Datos escritos en la colección 'users'
