### Data Generation
#### The first cell below generated a price point of the IBM stock every 5 seconds

In [0]:
from random import random
from datetime import datetime
import random
import math


# Function to generate time series data with a pattern
def generate_market_data(max_event_counts, ticker):
    base_value = 100
    amplitude = 20
    frequency = 0.1  # Determines the cycle length
    noise = random.uniform(-5, 5)  # Random noise
    value = base_value + amplitude * math.sin(frequency * max_event_counts) + noise
    data = {
        "timestamp": datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
        "ticker": ticker,
        "price": round(value, 2),
    }
    return data


In [0]:
#Installation de l'api de confluent kafka 
pip install confluent_kafka

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting confluent_kafka
  Downloading confluent_kafka-2.7.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.0/4.0 MB 14.2 MB/s eta 0:00:00
Installing collected packages: confluent_kafka
Successfully installed confluent_kafka-2.7.0
[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m


#### The cell below sends the raw data generated previously to the kafka topic hosted in confluent called "market_data"

In [0]:
import json
import os
import time
from confluent_kafka import Producer
import os

# Set environment variables
os.environ["KAFKA_BROKER"] = "pkc-921jm.us-east-2.aws.confluent.cloud:9092"
os.environ["KAFKA_TOPIC"] = "market_data"
os.environ["KAFKA_TOPIC_PROCESSED"] = "processed_data"
os.environ["KAFKA_API_KEY"] = "FFUTO33UE6P76HEB"
os.environ["KAFKA_API_SECRET"] = "qY5xvsluuxAhnUr0fNZsSOo/cpZ/9g2Ck4/M4gZeKU+mezzQ4UoANrGcS6IV9/9S"

# Set up the environment variables

# Kafka producer configuration
config = {
    "bootstrap.servers": os.getenv("KAFKA_BROKER"),
    "security.protocol": "SASL_SSL",
    "sasl.mechanisms": "PLAIN",
    "sasl.username": os.getenv("KAFKA_API_KEY"),
    "sasl.password": os.getenv("KAFKA_API_SECRET"),
    "client.id": "transaction-producer",
    "acks": "all",
    "retries": 5,
    "batch.size": 16384,
    "linger.ms": 5,
    "compression.type": "gzip",
}

# Initialize the Kafka producer
producer = Producer(config)

# Define the topic to send data to
topic = os.getenv("KAFKA_TOPIC")


# Callback to handle delivery reports (called once for each message)
def delivery_report(err, msg):
    if err is not None:
        print(f"Message delivery failed: {err}")
    else:
        print(
            f"Message delivered to {msg.topic()} [Partition: {msg.partition()}] at Offset: {msg.offset()}"
        )


def publish_messages(max_messages, ticker_name, message_interval=5):
    try:
        print(f"Publishing time series data to Kafka topic '{topic}'...")
        counter = 0
        while True and counter <= max_messages:
            key = ticker_name
            value = json.dumps(generate_market_data(counter, ticker_name))
            producer.produce(
                topic=topic, key=key, value=value, callback=delivery_report
            )
            print(f"Published: {value}")
            counter += 1

            # Poll to trigger the delivery report callback
            producer.poll(0)

            time.sleep(message_interval)

    except KeyboardInterrupt:
        print("Stopped publishing.")


# Run the producer function
if __name__ == "__main__":
    publish_messages(1000, "IBM")


Publishing time series data to Kafka topic 'market_data'...
Published: {"timestamp": "2025-01-07T15:11:58", "ticker": "IBM", "price": 101.09}
Published: {"timestamp": "2025-01-07T15:12:03", "ticker": "IBM", "price": 104.82}
Message delivered to market_data [Partition: 5] at Offset: 1659
Published: {"timestamp": "2025-01-07T15:12:08", "ticker": "IBM", "price": 104.04}
Message delivered to market_data [Partition: 5] at Offset: 1660
Published: {"timestamp": "2025-01-07T15:12:13", "ticker": "IBM", "price": 104.15}
Message delivered to market_data [Partition: 5] at Offset: 1661
Published: {"timestamp": "2025-01-07T15:12:18", "ticker": "IBM", "price": 107.18}
Message delivered to market_data [Partition: 5] at Offset: 1662
Published: {"timestamp": "2025-01-07T15:12:24", "ticker": "IBM", "price": 114.53}
Message delivered to market_data [Partition: 5] at Offset: 1663
Published: {"timestamp": "2025-01-07T15:12:29", "ticker": "IBM", "price": 110.88}
Message delivered to market_data [Partition: 5