In [None]:
# Testando se o PySpark funciona

from pyspark.sql import SparkSession

# Cria a sessão Spark
spark = SparkSession.builder \
    .appName("ETL_Test") \
    .config("spark.driver.memory", "2g") \
    .getOrCreate()

# Mostra a versão do Spark e PySpark
print("✅ PySpark inicializado com sucesso!")
print(f"Spark versão: {spark.version}")


In [None]:
# Lendo o arquivo example.csv

csv_path = "../input/csv/example.csv"

try:
    df = spark.read.csv(csv_path, header=True, inferSchema=True)
    print("✅ CSV lido com sucesso!")
    df.show(5)
except Exception as e:
    print("❌ Erro ao ler o CSV:", e)


In [None]:
# Tratar dados e salvar

import os
from utils.utils import create_logger

output_path = "../output/clean"
os.makedirs(output_path, exist_ok=True)

log = create_logger()

# Exemplo de tratamento: remover duplicatas e nulos
df_clean = df.dropDuplicates().dropna()

output_file = os.path.join(output_path, "example-clean.csv")
df_clean.coalesce(1).write.mode("overwrite").option("header", True).csv(output_file)

log("Arquivo tratado salvo em example-clean.csv")


In [None]:
# Testar leitura do .env e logar variáveis

from dotenv import load_dotenv
import os

load_dotenv()

print("DB_URL:", os.getenv("DB_URL"))
print("DB_USER:", os.getenv("DB_USER"))

In [None]:
# Testar conexão com o SQL Server via JDBC

from utils.db_connection import get_spark_session, get_jdbc_params
from utils.utils import create_logger

log = create_logger()
spark = get_spark_session("ETL_SQL_TEST")
jdbc_params = get_jdbc_params()

try:
    df_sql = spark.read.jdbc(
        url=jdbc_params["url"],
        table="INFORMATION_SCHEMA.TABLES",
        properties={
            "user": jdbc_params["user"],
            "password": jdbc_params["password"],
            "driver": jdbc_params["driver"]
        }
    )
    log("Conexão SQL Server bem-sucedida!")
    df_sql.show(5)
except Exception as e:
    log(f"Erro na conexão: {e}")

In [None]:
# Exemplo de carga (write) no banco

try:
    df_clean.write.jdbc(
        url=jdbc_params["url"],
        table="example_clean",
        mode="overwrite",
        properties={
            "user": jdbc_params["user"],
            "password": jdbc_params["password"],
            "driver": jdbc_params["driver"]
        }
    )
    log("Tabela 'example_clean' enviada com sucesso para o SQL Server.")
except Exception as e:
    log(f"Erro ao salvar no banco: {e}")