In [0]:
pg_user = dbutils.secrets.get(scope="devDolphin", key="kushagra")
pg_pass = dbutils.secrets.get(scope="devDolphin", key="pg-password")

In [0]:
pg_user = dbutils.secrets.get(scope="devDolphin", key="kushagra")
pg_pass = dbutils.secrets.get(scope="devDolphin", key="pg-password")

hostname = "devdolphinpostgresdb.postgres.database.azure.com"
database = "postgres"  # or your actual DB name

jdbc_url = f"jdbc:postgresql://{hostname}:5432/{database}?sslmode=require"

connection_properties = {
    "user": pg_user,
    "password": pg_pass,
    "driver": "org.postgresql.Driver"
}

# ✅ Read test table
df = spark.read.jdbc(
    url=jdbc_url,
    table="information_schema.tables",
    properties=connection_properties
)

df.show(5)


In [0]:
from pyspark.sql import Row

# 1️⃣ Create a tiny test DataFrame
test_data = [
    Row(id=1, name="Alice"),
    Row(id=2, name="Bob")
]

test_df = spark.createDataFrame(test_data)

# 2️⃣ Write to PostgreSQL
test_table = "test_write_permissions"

test_df.write.jdbc(
    url=jdbc_url,
    table=test_table,
    mode="overwrite",  # overwrite if exists
    properties=connection_properties
)

print(f"✅ Successfully wrote to table '{test_table}'")

# 3️⃣ Read it back to verify
verify_df = spark.read.jdbc(
    url=jdbc_url,
    table=test_table,
    properties=connection_properties
)

verify_df.show()

# 4️⃣ Optional: Drop the table when done
# You can drop via SQL if you want:
spark.sql(f"DROP TABLE IF EXISTS {test_table}")


In [0]:
# 1️⃣ Create table schema in Spark
empty_df = spark.createDataFrame([], "merchant STRING, total_txn LONG")

# 2️⃣ Write it to Postgres once
empty_df.write \
  .mode("overwrite") \
  .format("jdbc") \
  .option("url", jdbc_url) \
  .option("dbtable", "merchant_txn_state") \
  .option("user", pg_user) \
  .option("password", pg_pass) \
  .option("driver", "org.postgresql.Driver") \
  .save()

print("✅ Table created in Postgres!")


In [0]:
# Example: Write a mini DataFrame for testing
from pyspark.sql import Row

# Example rows: (merchant, total_txn)
data = [Row(merchant='M001', total_txn=100), Row(merchant='M002', total_txn=250)]
df = spark.createDataFrame(data)

df.write \
  .mode("overwrite") \
  .format("jdbc") \
  .option("url", jdbc_url) \
  .option("dbtable", "temp_merchant_txn") \
  .option("user", pg_user) \
  .option("password", pg_pass) \
  .option("driver", "org.postgresql.Driver") \
  .save()

print("✅ Temp table written to Postgres")


In [0]:
%pip install psycopg2-binary


In [0]:
import psycopg2

conn = psycopg2.connect(
    host=hostname,
    database=database,
    user=pg_user,
    password=pg_pass,
    sslmode="require"
)

cur = conn.cursor()

# Drop if exists and recreate with PRIMARY KEY
cur.execute("""
    DROP TABLE IF EXISTS merchant_txn_state;
    CREATE TABLE merchant_txn_state (
        merchant TEXT PRIMARY KEY,
        total_txn BIGINT
    );
""")

conn.commit()
cur.close()
conn.close()
print("✅ merchant_txn_state created with PRIMARY KEY")


In [0]:
import psycopg2

merge_sql = """
INSERT INTO merchant_txn_state (merchant, total_txn)
SELECT merchant, total_txn FROM temp_merchant_txn
ON CONFLICT (merchant)
DO UPDATE SET total_txn = EXCLUDED.total_txn;
"""

conn = psycopg2.connect(
    host=hostname,
    database=database,
    user=pg_user,
    password=pg_pass,
    sslmode="require"
)
cur = conn.cursor()
cur.execute(merge_sql)
conn.commit()
cur.close()
conn.close()
print("✅ Upsert done via psycopg2")


In [0]:
# Read the final upserted table
df = spark.read.jdbc(
    url=jdbc_url,
    table="merchant_txn_state",
    properties=connection_properties
)

df.show()


In [0]:
import psycopg2

query_sql = "SELECT * FROM merchant_txn_state LIMIT 20;"

conn = psycopg2.connect(
    host=hostname,
    database=database,
    user=pg_user,
    password=pg_pass,
    sslmode="require"
)
cur = conn.cursor()
cur.execute(query_sql)
rows = cur.fetchall()
for row in rows:
    print(row)

cur.close()
conn.close()


In [0]:
from pyspark.sql import SparkSession

pg_user = dbutils.secrets.get(scope="devDolphin", key="kushagra")
pg_pass = dbutils.secrets.get(scope="devDolphin", key="pg-password")

hostname = "devdolphinpostgresdb.postgres.database.azure.com"
database = "postgres"  # or your actual DB name

jdbc_url = f"jdbc:postgresql://{hostname}:5432/{database}?sslmode=require"

connection_properties = {
    "user": pg_user,
    "password": pg_pass,
    "driver": "org.postgresql.Driver"
}

# === 1) Create empty DataFrames with required schema ===

gender_state_schema = "merchant STRING, customer STRING, gender STRING"
pattern3_schema = """
  YStartTime TIMESTAMP,
  detectionTime TIMESTAMP,
  patternId STRING,
  ActionType STRING,
  customerName STRING,
  MerchantId STRING
"""

empty_gender_state = spark.createDataFrame([], schema=gender_state_schema)
empty_pattern3 = spark.createDataFrame([], schema=pattern3_schema)

# === 2) Write them once with mode "overwrite" to create tables if not exist ===

empty_gender_state.write.mode("overwrite").jdbc(
    jdbc_url,
    "gender_summary_state",
    properties=connection_properties
)

empty_pattern3.write.mode("overwrite").jdbc(
    jdbc_url,
    "pattern3_detections",
    properties=connection_properties
)

print("✅ Both Postgres tables created or overwritten.")


In [0]:
import psycopg2

query_sql = "SELECT * FROM gender_summary_state ;"

conn = psycopg2.connect(
    host=hostname,
    database=database,
    user=pg_user,
    password=pg_pass,
    sslmode="require"
)
cur = conn.cursor()
cur.execute(query_sql)
rows = cur.fetchall()
for row in rows:
    print(row)

cur.close()
conn.close()
