# ****Read bing_latest_news table as a Dataframe****

In [23]:
df = spark.sql("SELECT * FROM bing_LH.dbo.bing_latest_news")
display(df)

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 25, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, bc532618-357a-4016-a5a9-48fe120027b8)

# ****Machine Learning Model****

In [24]:
import synapse.ml.core
from synapse.ml.services import AnalyzeText

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 26, Finished, Available, Finished)

In [25]:
model = (AnalyzeText()
        .setTextCol("description")
        .setKind("SentimentAnalysis")
        .setOutputCol("response")
        .setErrorCol("error")
)

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 27, Finished, Available, Finished)

In [26]:
result = model.transform(df)

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 28, Finished, Available, Finished)

In [27]:
display(result)

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 29, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, ebca5829-e6d8-4f2b-ac9b-f7c330621e4a)

# ****Select sentiment column****

In [28]:
from pyspark.sql.functions import *
sentiment_df = result.withColumn("sentiment", col("response.documents.sentiment"))

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 30, Finished, Available, Finished)

In [29]:
sentiment_final_df = sentiment_df.drop("response", "error")

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 31, Finished, Available, Finished)

In [30]:
display(sentiment_final_df)

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 32, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 58d0aa83-9189-4176-88f8-8d343e0a0ac9)

## ****Incremental loading logic using MERGE SQL statement****

In [31]:
from pyspark.sql.utils import AnalysisException  # Correct import statement

try:
    table_name = 'Bing_Senti_Ana_Dev.bing_LH.dbo.Senti_Analysis_Bing'
    sentiment_final_df.write.format("delta").saveAsTable(table_name)  # Fixed formatting

except AnalysisException:
    print("Table Already Exists")

    # Create or replace the temporary view
    sentiment_final_df.createOrReplaceTempView("VM_sentiment_final_df")

    # Use proper indentation and formatting in the SQL MERGE statement
    spark.sql(f"""
        MERGE INTO {table_name} AS target_table
        USING VM_sentiment_final_df AS source_view
        ON source_view.url = target_table.url
        WHEN MATCHED AND 
        (source_view.title <> target_table.title OR
         source_view.description <> target_table.description OR
         source_view.category <> target_table.category OR
         source_view.image <> target_table.image OR
         source_view.provider_type <> target_table.provider_type OR
         source_view.provider_image <> target_table.provider_image OR
         source_view.datePublished <> target_table.datePublished)
        THEN UPDATE SET 
            target_table.title = source_view.title,
            target_table.description = source_view.description,
            target_table.category = source_view.category,
            target_table.image = source_view.image,
            target_table.provider_type = source_view.provider_type,
            target_table.provider_image = source_view.provider_image,
            target_table.datePublished = source_view.datePublished
    """)

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 33, Finished, Available, Finished)

In [32]:
%%sql
SELECT * FROM Bing_Senti_Ana_Dev.bing_LH.dbo.bing_latest_news

StatementMeta(, dea2c0da-61de-4814-ad7f-b1b5e75a17c6, 34, Finished, Available, Finished)

<Spark SQL result set with 94 rows and 8 fields>