In [8]:
df = spark.sql("SELECT * FROM bing_lakehouse.world_latest_news LIMIT 1000")
display(df)

StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 10, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 29aa2f39-1ab7-4501-a92f-6bcf2ef2c79c)

In [9]:
import synapse.ml.core
from synapse.ml.services import AnalyzeText

model = AnalyzeText() \
    .setTextCol("description") \
    .setKind("SentimentAnalysis") \
    .setOutputCol("response") \
    .setErrorCol("error")

df_output = model.transform(df)
display(df_output)

StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 11, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, d79662a7-bd3d-43d2-b91f-b039785a1533)

In [10]:
from pyspark.sql.functions import col

# Assuming each row has one document in the "response.documents" array
sentiment_df = df_output.withColumn("sentiment", col("response.documents.sentiment"))

StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 12, Finished, Available, Finished)

In [11]:
sentiment_df_final=sentiment_df.drop("error","response")
display(sentiment_df_final)

StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 13, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 77ca3e1d-3ac2-400c-9477-c1a4fcd3f4ff)

In [12]:
from pyspark.sql.functions import col, to_date

sentiment_df_final = sentiment_df_final.withColumn("datePublished", to_date(col("datePublished"), "dd-MMM-yyyy"))


StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 14, Finished, Available, Finished)

In [13]:
from pyspark.sql.utils import AnalysisException

try:
    table_name = "bing_lakehouse.news_sentiment"
    sentiment_df_final.write.format("delta").saveAsTable(table_name) 

except AnalysisException:
    print("Table Already Exists")

    sentiment_df_final.createOrReplaceTempView("vw_sentiment_df_final")

    spark.sql(f"""
        MERGE INTO {table_name} target_table
        USING vw_sentiment_df_final source_view
        ON source_view.url = target_table.url

        WHEN MATCHED AND (
            source_view.title <> target_table.title OR
            source_view.description <> target_table.description OR
            source_view.category <> target_table.category OR
            source_view.image <> target_table.image OR
            source_view.provider <> target_table.provider OR
            source_view.datePublished <> target_table.datePublished
        ) THEN UPDATE SET *

        WHEN NOT MATCHED THEN INSERT *
    """)

StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 15, Finished, Available, Finished)

In [14]:
display(spark.sql("SELECT * FROM bing_lakehouse.news_sentiment LIMIT 1000"))

StatementMeta(, 943fa4bb-f660-41dc-a41b-6bf20f3e9b05, 16, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 757a8add-70ea-4ff2-a0c2-2524a6206a89)