

![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/CLASSIFICATION_TR_SENTIMENT.ipynb)




# **Turkish Sentiment**

## 0. Colab Setup


In [1]:
# Install PySpark and Spark NLP
! pip install -q pyspark==3.1.2 spark-nlp

## 1. Start Spark Session

In [2]:
import pandas as pd
import numpy as np
import os
import json
from pyspark.ml import Pipeline
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from sparknlp.annotator import *
from sparknlp.base import *
import sparknlp
from sparknlp.pretrained import PretrainedPipeline

spark = sparknlp.start()

print("Spark NLP version", sparknlp.version())
print("Apache Spark version:", spark.version)

Spark NLP version 3.4.0
Apache Spark version: 3.1.2


## 2. Preparing Input

In [3]:
text_list = [
             """Bu sıralar moralim bozuk.""",
             """Sınavımı geçtiğimi öğrenince derin bir nefes aldım.""",
             """Hizmet kalite çok güzel teşekkürler""",
             """Meydana gelen kazada 1 kisi hayatini kaybetti.""",
             """Ocak ayinda deprem bekleniyor""",
             """Gun batimi izlemeyi cok severim.""",
             ]

files = [f"{i}.txt" for i in (range(1, len(text_list)+1))]

df = spark.createDataFrame(pd.DataFrame({'text': text_list, 'file' : files}))

## 3. Define Pipeline

In [4]:
document = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

embeddings = UniversalSentenceEncoder.pretrained("tfhub_use_multi", "xx") \
    .setInputCols("document") \
    .setOutputCol("sentence_embeddings")

sentimentClassifier = ClassifierDLModel.pretrained("classifierdl_use_sentiment", "tr") \
    .setInputCols(["document", "sentence_embeddings"]) \
    .setOutputCol("class")

sentiment_pipeline = Pipeline(stages=[document, embeddings, sentimentClassifier])

tr_sentiment_pipeline = sentiment_pipeline.fit(spark.createDataFrame([['']]).toDF("text"))

tfhub_use_multi download started this may take some time.
Approximate size to download 247.6 MB
[OK!]
classifierdl_use_sentiment download started this may take some time.
Approximate size to download 21.4 MB
[OK!]


## 4. Predictions

In [5]:
res = tr_sentiment_pipeline.transform(df).toPandas()

In [6]:
pd.set_option('display.max_colwidth', None)

res[['text', 'class']].head()

Unnamed: 0,text,class
0,Bu sıralar moralim bozuk.,"[(category, 0, 24, NEGATIVE, {'sentence': '0', 'POSITIVE': '6.861473E-38', 'NEGATIVE': '1.0'}, [])]"
1,Sınavımı geçtiğimi öğrenince derin bir nefes aldım.,"[(category, 0, 50, POSITIVE, {'sentence': '0', 'POSITIVE': '1.0', 'NEGATIVE': '2.031546E-12'}, [])]"
2,Hizmet kalite çok güzel teşekkürler,"[(category, 0, 34, POSITIVE, {'sentence': '0', 'POSITIVE': '1.0', 'NEGATIVE': '0.0'}, [])]"
3,Meydana gelen kazada 1 kisi hayatini kaybetti.,"[(category, 0, 45, NEGATIVE, {'sentence': '0', 'POSITIVE': '6.323716E-30', 'NEGATIVE': '1.0'}, [])]"
4,Ocak ayinda deprem bekleniyor,"[(category, 0, 28, NEGATIVE, {'sentence': '0', 'POSITIVE': '7.328405E-27', 'NEGATIVE': '1.0'}, [])]"


In [7]:
res['prediction'], res['score'] = '-', 0

res = res[['text', 'class', 'prediction', 'score']].explode('class')

res['prediction'] = res['class'].apply(lambda row: row[3])
res['score'] = res['class'].apply(lambda row: round(float(row[4][row[3].strip()])*100, 3))

res.head()

Unnamed: 0,text,class,prediction,score
0,Bu sıralar moralim bozuk.,"(category, 0, 24, NEGATIVE, {'sentence': '0', 'POSITIVE': '6.861473E-38', 'NEGATIVE': '1.0'}, [])",NEGATIVE,100.0
1,Sınavımı geçtiğimi öğrenince derin bir nefes aldım.,"(category, 0, 50, POSITIVE, {'sentence': '0', 'POSITIVE': '1.0', 'NEGATIVE': '2.031546E-12'}, [])",POSITIVE,100.0
2,Hizmet kalite çok güzel teşekkürler,"(category, 0, 34, POSITIVE, {'sentence': '0', 'POSITIVE': '1.0', 'NEGATIVE': '0.0'}, [])",POSITIVE,100.0
3,Meydana gelen kazada 1 kisi hayatini kaybetti.,"(category, 0, 45, NEGATIVE, {'sentence': '0', 'POSITIVE': '6.323716E-30', 'NEGATIVE': '1.0'}, [])",NEGATIVE,100.0
4,Ocak ayinda deprem bekleniyor,"(category, 0, 28, NEGATIVE, {'sentence': '0', 'POSITIVE': '7.328405E-27', 'NEGATIVE': '1.0'}, [])",NEGATIVE,100.0
