# **Configuração Inicial**

## Importação das bibliotecas

In [66]:
# Importação das bibliotecas e funções a serem utilizadas no códigos.
from pyspark.sql import SparkSession
from datetime import datetime

## Criando a sessão do Spark

In [74]:
# Criando a sessão do Spark.
spark = SparkSession.builder \
    .master("local") \
    .appName("MongoDB") \
    .config("spark.mongodb.read.connection.uri", "mongodb://localhost:27017/ProjetoPMD") \
    .config("spark.mongodb.write.connection.uri", "mongodb://localhost:27017/ProjetoPMD") \
    .getOrCreate()
    

# **Testes de Conexão**

## Testando Operação de Leitura

In [75]:
# Lendo o banco de dados no mongoDB em um dataframe.
df = spark.read.format("mongodb")\
    .option("uri", "mongodb://localhost:27017/")\
    .option("database", "ProjetoPMD")\
    .option("collection", "itineraries")\
    .load()

In [76]:
# Visualização do dataframe carregado a partir do mongoDB.
df.show()

+--------------------+--------+------------------+-----------+-------------+-------------------+--------------+---------+------------+--------------------+-------------------+--------------+-------------------+--------------------+--------------------------+-------------------------------+----------------------+-----------------+----------------------------+---------------------------------+------------------------+----------------+-------------------------+----------------------------+---------------+---------+-------------------+--------------+
|                 _id|baseFare|destinationAirport|elapsedDays|fareBasisCode|         flightDate|isBasicEconomy|isNonStop|isRefundable|               legId|         searchDate|seatsRemaining|segmentsAirlineCode| segmentsAirlineName|segmentsArrivalAirportCode|segmentsArrivalTimeEpochSeconds|segmentsArrivalTimeRaw|segmentsCabinCode|segmentsDepartureAirportCode|segmentsDepartureTimeEpochSeconds|segmentsDepartureTimeRaw|segmentsDistance|segmentsDur

## Testando Operação de Escrita

In [71]:
# Definindo o schema do novo registro.
schema = df.schema

# Definindo os dados para o novo registro.
data = [
    {
        "_id": "a1",
        "baseFare": 260.47,
        "destinationAirport": "BOS",
        "elapsedDays": 0,
        "fareBasisCode": "L0AJZNN1",
        "flightDate": datetime.fromisoformat("2022-04-17T00:00:00"),
        "isBasicEconomy": False,
        "isNonStop": False,
        "isRefundable": False,
        "legId": "721d9a2f66fe479e7c17b13e7ae0bb15",
        "searchDate": datetime.fromisoformat("2022-04-16T00:00:00"),
        "seatsRemaining": 1,
        "segmentsAirlineCode": "AA||AA",
        "segmentsAirlineName": "American Airlines||American Airlines",
        "segmentsArrivalAirportCode": "CLT||BOS",
        "segmentsArrivalTimeEpochSeconds": "1650212880||1650224280",
        "segmentsArrivalTimeRaw": "2022-04-17 12:28:00||2022-04-17 15:38:00",
        "segmentsCabinCode": None,
        "segmentsDepartureAirportCode": "ATL||CLT",
        "segmentsDepartureTimeEpochSeconds": "1650207720||1650216420",
        "segmentsDepartureTimeRaw": "2022-04-17 11:02:00||2022-04-17 13:27:00",
        "segmentsDistance": None,
        "segmentsDurationInSeconds": None,
        "segmentsEquipmentDescription": "Canadair Regional Jet 900||Airbus A321",
        "startingAirport": "ATL",
        "totalFare": 302.11,
        "totalTravelDistance": 956,
        "travelDuration": "PT4H36M"
    }
]

In [72]:
# Criando um dataframe do spark dos dados criados anteriormente.
new_df = spark.createDataFrame(data, schema=schema)

# Adicionando esse registro no database.
new_df.write.format("mongodb") \
    .mode("append") \
    .option("database", "ProjetoPMD") \
    .option("collection", "itineraries") \
    .save()

In [73]:
# Para remoção do registro adicionado anteriormente, porém, o uso deve ser no terminal mongosh do próprio mongoDB.
%use ProjetoPMD
%db.itineraries.deleteOne({ _id: "a1" })

UsageError: Line magic function `%use` not found.


In [None]:
# Para finalizar a sessão do spark
spark.stop()