# Configuration for local database


In [7]:
""" Notebook: 00_config_spark.ipynb """

# Importing libraries
from pyspark.sql import SparkSession
import os

# Define the catalog name
catalog_name = "toll_reconciliation_tool"

# Define the layer names
bronze_layer = "bronze"
silver_layer = "silver"
gold_layer = "gold"

# Define the desired warehouse location explicitly
project_dir = "C:/Users/renat/Documents/imgPdados-finance-uber/toll-reconciliation-tool/spark-warehouse"
warehouse_location = "file:///" + os.path.abspath(project_dir)
print(f"Setting Spark warehouse to: {warehouse_location}")

# Change the current working directory
os.chdir(project_dir)
print(f"Changed current working directory to: {os.getcwd()}")


Setting Spark warehouse to: file:///C:\Users\renat\Documents\imgPdados-finance-uber\toll-reconciliation-tool\spark-warehouse
Changed current working directory to: C:\Users\renat\Documents\imgPdados-finance-uber\toll-reconciliation-tool\spark-warehouse


In [8]:
"""# Initialize Spark session with the specified warehouse directory
spark = SparkSession.builder \
    .appName(catalog_name) \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .config("hive.metastore.warehouse.dir", warehouse_location) \
    .enableHiveSupport() \
    .getOrCreate()"""

# Initialize Spark session with the specified warehouse directory
spark = SparkSession.builder \
    .appName(catalog_name) \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .enableHiveSupport() \
    .getOrCreate()


In [3]:

# Create the bronze database (if it doesn't exist)
spark.sql(f"CREATE DATABASE IF NOT EXISTS {bronze_layer}")

# Create the silver database (if it doesn't exist)
spark.sql(f"CREATE DATABASE IF NOT EXISTS {silver_layer}")

# Create the gold database (if it doesn't exist)
spark.sql(f"CREATE DATABASE IF NOT EXISTS {gold_layer}")

print(f"Spark session initialized with app name: {spark.sparkContext.appName}")
print(f"Databases '{bronze_layer}', '{silver_layer}', and '{gold_layer}' created (if they didn't exist) in: {warehouse_location}")


Spark session initialized with app name: toll_reconciliation_tool
Databases 'bronze', 'silver', and 'gold' created (if they didn't exist) in: file:///C:\Users\renat\Documents\imgPdados-finance-uber\toll-reconciliation-tool\spark-warehouse


In [None]:
# Print Metastore URI for debugging
print(f"Metastore URI: {spark.conf.get('hive.metastore.uris', 'default')}")  # Get with default to avoid error

# Check if the 'bronze' schema exists
result = spark.sql("SHOW SCHEMAS").collect()
schemas = [row[0] for row in result]
print(schemas)



Metastore URI: default
['bronze', 'default', 'gold', 'silver']
The 'bronze' schema already exists.


In [10]:
try:
    # Attempt a basic Hive operation
    spark.sql("SHOW DATABASES").show()

except Exception as e:
    print(f"Error: {e}")

finally:
    spark.stop()
    

+---------+
|namespace|
+---------+
|   bronze|
|  default|
|     gold|
|   silver|
+---------+



In [11]:
# You can stop the Spark session here
spark.stop()