# Create the ML model

In [None]:
df = spark.sql("SELECT * FROM bing_lake_db.tbl_latest_news")

In [None]:
import synapse.ml.core
from synapse.ml.services import AnalyzeText

In [None]:
#import the model and configure the input and output columns, here SentimentAnalysis is based on column 'description'
model = (AnalyzeText()
        .setTextCol("description")
        .setKind("SentimentAnalysis")
        .setOutputCol("response")
        .setErrorCol("error"))

# Apply the model

In [None]:
#Apply the model tou our dataframe
result = model.transform(df)

# Cleanup

In [None]:
#create sentiment column
from pyspark.sql.functions import col
sentiment_df = result.withColumn("sentiment",col("response.documents.sentiment"))

In [None]:
sentiment_df_final = sentiment_df.drop("error","response")

# Mode 1 Merge (Incremental Load type 1)

In [None]:
from pyspark.sql.utils import AnalysisException

try: 
    table_name = "bing_lake_db.tbl_sentiment_analysis"
    sentiment_df_final.write.format("delta").saveAsTable(table_name)

except AnalysisException:
    print ("Table already exists!")

    sentiment_df_final.createOrReplaceTempView("vw_sentiment_df_final")
    # check if the url matched between the source and the target, if it matched then, check if any of the 
    # column values has changed and update the whole row accordingly, if not insert the whole row in the table
    spark.sql(f"""MERGE INTO {table_name} target_table
                     USING vw_sentiment_df_final source_view
                     ON source_view.url = target_table.url

                     WHEN MATCHED AND
                     (source_view.title <> target_table.title OR
                     source_view.description <> target_table.description OR
                     source_view.category <> target_table.category OR
                     source_view.image <> target_table.image OR
                     source_view.provider <> target_table.provider OR
                     source_view.datePublished <> target_table.datePublished)

                     THEN UPDATE SET *
                     WHEN NOT MATCHED THEN INSERT *

                """)


In [None]:
from pyspark.sql.functions import col, to_date
df = spark.sql("SELECT * FROM bing_lake_db.tbl_sentiment_analysis")
df= df.withColumn("datePublished", to_date(col("datePublished"), "dd-MM-yyyy"))

In [44]:
df.write.format('delta').mode("overwrite").option("overwriteSchema", "True").saveAsTable(table_name)

StatementMeta(, dd763a5d-7a64-4d97-9118-5460b4ab5ec4, 46, Finished, Available)