In [None]:
import time
import json
import requests
from kafka import KafkaProducer

producer = KafkaProducer(
    bootstrap_servers='localhost:9092',
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

API_URL = "https://gbfs.citibikenyc.com/gbfs/en/station_status.json"

while True:
    try:
        response = requests.get(API_URL)
        data = response.json()
        stations = data['data']['stations']

        for station in stations:
            event = {
                "station_id": station['station_id'],
                "num_bikes_available": station['num_bikes_available'],
                "num_docks_available": station['num_docks_available'],
                "is_renting": station['is_renting'],
                "last_reported": station['last_reported'],
                "event_time": time.strftime('%Y-%m-%d %H:%M:%S')
            }
            producer.send('bike_station_status', event)

        print(f"Sent {len(stations)} events to Kafka")
        time.sleep(60)
    except Exception as e:
        print("Error:", e)
        time.sleep(10)


In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, TimestampType
from pyspark.sql.functions import from_json, col, to_timestamp

# SparkSession مع Kafka Connector
spark = SparkSession.builder \
    .appName("BikeStreaming") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.13:4.1.0") \
    .getOrCreate()

# قراءة الـ Stream من Kafka
df = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "bike_station_status") \
    .option("startingOffsets", "latest") \
    .load()

# Schema
schema = StructType([
    StructField("station_id", StringType()),
    StructField("num_bikes_available", IntegerType()),
    StructField("num_docks_available", IntegerType()),
    StructField("is_renting", IntegerType()),
    StructField("last_reported", LongType()),
    StructField("event_time", StringType())
])

# Parsing JSON
parsed_df = df.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), schema).alias("data")) \
    .select("data.*") \
    .withColumn("event_time", to_timestamp(col("event_time")))

# عرض البيانات على console للتجربة
query = parsed_df.writeStream \
    .format("console") \
    .outputMode("append") \
    .start()

query.awaitTermination()
