In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col, avg, sum as _sum, window
from pyspark.sql.types import StructType, StringType, IntegerType, DoubleType, TimestampType

spark = SparkSession.builder.appName("RealTimeTrafficAnalytics").getOrCreate()
spark.sparkContext.setLogLevel("WARN")

KAFKA_BROKER = "kafka:9092"
TOPIC_NAME = "traffic_data"

raw_df = spark.readStream.format("kafka") \
    .option("kafka.bootstrap.servers", KAFKA_BROKER) \
    .option("subscribe", TOPIC_NAME) \
    .option("startingOffsets", "latest") \
    .load()

string_df = raw_df.selectExpr("CAST(value AS STRING)")

schema = StructType() \
    .add("timestamp", StringType()) \
    .add("location", StringType()) \
    .add("vehicle_count", IntegerType()) \
    .add("avg_speed", DoubleType())

parsed_df = string_df.select(from_json(col("value"), schema).alias("data")).select("data.*")
parsed_df = parsed_df.withColumn("timestamp", col("timestamp").cast(TimestampType()))

aggregated_df = parsed_df.withWatermark("timestamp", "1 minute") \
    .groupBy(window(col("timestamp"), "1 minute"), col("location")) \
    .agg(avg("avg_speed").alias("mean_speed"), _sum("vehicle_count").alias("total_vehicles")) \
    .select(col("window.start").alias("window_start"), col("window.end").alias("window_end"), col("location"), col("mean_speed"), col("total_vehicles"))

query = aggregated_df.writeStream.outputMode("append") \
    .format("csv") \
    .option("path", "./traffic_analytics/") \
    .option("checkpointLocation", "./analytics_checkpoints/") \
    .trigger(processingTime="60 seconds") \
    .start()

query.awaitTermination()

In [None]:
import zipfile
import os
import shutil

# مجلد الكود الرئيسي فقط
project_code = ['work', 'traffic_stream_analysis.ipynb']  # ملفات المشروع الأساسية
output_zip = 'traffic_project.zip'

# مسار ملف ZIP النهائي
output_zip_path = os.path.join(os.getcwd(), output_zip)

with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for item in project_code:
        if os.path.isdir(item):
            for root, dirs, files in os.walk(item):
                for file in files:
                    filepath = os.path.join(root, file)
                    try:
                        # نحاول نضيف الملف، لو شغال أو فيه مشكلة نكمل
                        zipf.write(filepath, os.path.relpath(filepath, os.getcwd()))
                    except Exception as e:
                        print(f"Skipped {filepath} -> {e}")
        else:
            try:
                zipf.write(item, os.path.basename(item))
            except Exception as e:
                print(f"Skipped {item} -> {e}")

print(f"Project zipped successfully: {output_zip_path}")