In [0]:
# Databricks Part 2 – Simulated Streaming with Model

In [0]:
#### Load the saved model
from pyspark.ml.pipeline import PipelineModel

model = PipelineModel.load("/dbfs/FileStore/models/fake_news_best_model")
print("** Model loaded.")

** Model loaded.


In [0]:
print(type(model))

<class 'pyspark.ml.pipeline.PipelineModel'>


In [0]:
#### Load the streaming data (new messages)
df_stream = spark.read.csv("/FileStore/tables/stream1.csv", header=True, inferSchema=True).na.drop()
display(df_stream)

text
Government confirms economic growth for next quarter.
BREAKING: Aliens have landed in Times Square!
Study shows coffee reduces risk of heart disease.
President announces new education reform policy.
Scientists discover cure for common cold in backyard.
"NASA finds water on Mars, possible signs of life."
Click here to win a free iPhone in 5 minutes!
World leaders meet to discuss climate change solutions.
This one weird trick will make you rich overnight!
Experts warn about fake news spreading on social media.


In [0]:
#### Apply the model
from pyspark.sql.functions import current_timestamp

predictions = model.transform(df_stream).withColumn("timestamp", current_timestamp())
display(predictions.select("text", "prediction", "timestamp"))

text,prediction,timestamp
Government confirms economic growth for next quarter.,0.0,2025-05-26T21:04:56.717+0000
BREAKING: Aliens have landed in Times Square!,0.0,2025-05-26T21:04:56.717+0000
Study shows coffee reduces risk of heart disease.,0.0,2025-05-26T21:04:56.717+0000
President announces new education reform policy.,0.0,2025-05-26T21:04:56.717+0000
Scientists discover cure for common cold in backyard.,0.0,2025-05-26T21:04:56.717+0000
"NASA finds water on Mars, possible signs of life.",0.0,2025-05-26T21:04:56.717+0000
Click here to win a free iPhone in 5 minutes!,0.0,2025-05-26T21:04:56.717+0000
World leaders meet to discuss climate change solutions.,0.0,2025-05-26T21:04:56.717+0000
This one weird trick will make you rich overnight!,0.0,2025-05-26T21:04:56.717+0000
Experts warn about fake news spreading on social media.,0.0,2025-05-26T21:04:56.717+0000


In [0]:
#### Save predictions to persistent storage
predictions.select("text", "prediction", "timestamp") \
    .write.mode("append").option("header", True).csv("/FileStore/tables/stream_results")

In [0]:
#### Query predictions with Spark SQL
predictions.createOrReplaceTempView("stream_results")
spark.sql("SELECT prediction, COUNT(*) as total FROM stream_results GROUP BY prediction").show()

+----------+-----+
|prediction|total|
+----------+-----+
|       0.0|   10|
+----------+-----+

