In [1]:
# pip install wfdb pandas

In [2]:
import wfdb
import pandas as pd
import os

# Set working directory
data_path = "Database/16265"  # or just "Database" if files are directly there

# Read 30,000 samples from 16265
record = wfdb.rdrecord(os.path.join("Database", "16265"), sampto=30000)

# Create DataFrame
df = pd.DataFrame(record.p_signal, columns=["ECG1", "ECG2"])
df["timestamp"] = df.index / record.fs  # convert sample index to seconds

# Reorder columns
df = df[["timestamp", "ECG1", "ECG2"]]

# Save to CSV
df.to_csv("ecg_stream_sample.csv", index=False)
print("ECG CSV saved as ecg_stream_sample.csv")
print(df.head())

ECG CSV saved as ecg_stream_sample.csv
   timestamp   ECG1   ECG2
0   0.000000 -0.165 -0.325
1   0.007812 -0.155 -0.325
2   0.015625 -0.195 -0.305
3   0.023438 -0.205 -0.305
4   0.031250 -0.185 -0.295


In [3]:
import pandas as pd
from sklearn.ensemble import IsolationForest
import joblib
import os

os.makedirs("models", exist_ok=True)

# Load the ECG dataset
df = pd.read_csv("ecg_stream_sample.csv")

# Select only ECG features for modeling
X = df[["ECG1", "ECG2"]]

# Train the Isolation Forest model
print("Training Isolation Forest model...")
model = IsolationForest(contamination=0.01, random_state=42)
model.fit(X)

# Save the model
model_path = "models/model_v1.pkl"
joblib.dump(model, model_path)
print(f"Model saved to: {model_path}")

Training Isolation Forest model...
Model saved to: models/model_v1.pkl


In [6]:
import pandas as pd
from kafka import KafkaProducer
import json
import time

# Load ECG data
df = pd.read_csv("ecg_stream_sample.csv")

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers='localhost:9092',
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

print("Streaming ECG data to Kafka topic: 'ecg_stream'...")

# Stream each row as a JSON message
for _, row in df.iterrows():
    message = {
        "timestamp": row["timestamp"],
        "ECG1": row["ECG1"],
        "ECG2": row["ECG2"]
    }
    producer.send("ecg_stream", value=message)
    time.sleep(0.05)  # simulate ~20Hz streaming

print("Streaming complete.")

Streaming ECG data to Kafka topic: 'ecg_stream'...
Streaming complete.
