In [None]:
# %% [markdown]
# # Spark Batch Consumer (Test in Jupyter)
# 
# Requires: `pip install pyspark findspark`

# %%
import findspark
findspark.init()  # Only needed if Spark is locally installed

from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col
import json

# %% [markdown]
# ## 1. Initialize Spark Session (Local Mode)

# %%
spark = SparkSession.builder \
    .appName("RedpandaTest") \
    .master("local[2]") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0") \
    .getOrCreate()

# %% [markdown]
# ## 2. Test Connection to Redpanda

# %%
test_df = spark.read \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "pg_cdc.public.employee_activities") \
    .option("startingOffsets", "earliest") \
    .load() \
    .limit(5)

test_df.show(truncate=False)

# %% [markdown]
# ## 3. Process Sample Batch

# %%
# Mock schema - replace with your actual schema
sample_schema = """
{
  "type": "struct",
  "fields": [
    {"name": "id", "type": "int"},
    {"name": "user_id", "type": "int"},
    {"name": "activity", "type": "string"}
  ]
}
"""

# Parse messages
parsed = test_df.select(
    from_json(col("value").cast("string"), sample_schema).alias("data")
).select("data.*")

parsed.show()

# %% [markdown]
# ## 4. Mock Upload Function
# %%
def mock_upload(df):
    print("=== Simulating upload to data warehouse ===")
    df.show()
    print(f"Would upload {df.count()} records")
    
mock_upload(parsed)

KeyboardInterrupt: 

In [4]:
# %% [markdown]
# # Slack Notifier Test (Jupyter)
# 
# Requires: `pip install python-dotenv kafka-python requests`

# %%
from kafka import KafkaConsumer
import requests
import json
from IPython.display import display, Markdown
import time

# %% [markdown]
# ## 1. Mock Slack Webhook

# %%
class MockSlack:
    def __init__(self):
        self.messages = []
    
    def post(self, url, json):
        self.messages.append(json['text'])
        display(Markdown(f"**Mock Slack:** {json['text']}"))
        return type('obj', (object,), {'status_code': 200})

mock_slack = MockSlack()

# %% [markdown]
# ## 2. Test Consumer (Read Last 5 Messages)

# %%
consumer = KafkaConsumer(
    "pg_cdc.public.employee_activities",
    bootstrap_servers="localhost:9092",
    auto_offset_reset="earliest",  # Read from beginning for test
    value_deserializer=lambda m: json.loads(m.decode('utf-8')),
    consumer_timeout_ms=5000  # Stop after 5 sec inactivity
)

# %%
print("Listening for messages...")
for i, message in enumerate(consumer):
    if i >= 5:  # Limit to 5 messages for testing
        break
        
    record = message.value
    if record.get('op') == 'c':
        msg = f"New activity: {record['after']['activity']}"  # Adjust fields
        mock_slack.post("mock_url", {"text": msg})
        
print("Test complete!")

ModuleNotFoundError: No module named 'kafka.vendor.six.moves'