# Task 4

### Find emoticons in posts and post comments (negative, positive, neutral) (you can use external libraries or predefined emoticon lists) (use the spark udf and broadcast function for emoticons)

In [9]:
import pyspark
import pyspark.sql.functions
import pyspark.sql.types
import emojis
import os

In [10]:
# Configuration of SPARK_HOME system variable
os.environ["SPARK_HOME"] = "C:/spark"

In [4]:
# pyspark session and SQL context creation
configs = pyspark.SparkConf().setAppName("task4").setMaster("local")
spark_context = pyspark.SparkContext(conf=configs)
spark = pyspark.sql.SparkSession(spark_context)
sql_context = pyspark.sql.SQLContext(spark_context)

In [50]:
# Function to extract emojis defenition
emojis_extraction = \
    pyspark.sql.functions.udf(lambda text: list(emojis.get(text)), 
                              pyspark.sql.types.ArrayType(pyspark.sql.types.StringType()))

In [52]:
# Posts data loading where text is not Null
data_posts = sql_context.read.json("posts_api.json/*.json").\
    where(pyspark.sql.functions.col("text").isNotNull())

# Emojis data creation
data_emojis = data_posts.select("key", emojis_extraction("text").alias("emojis")).\
    where(pyspark.sql.functions.size(pyspark.sql.functions.col("emojis")) > 0).sort("key")

# Emojis data showing
data_emojis.show()

# Emojis data saving
data_emojis.coalesce(1).write.format("json").mode("overwrite").save("task4_output.json")

+---------+------------+
|      key|      emojis|
+---------+------------+
|-94_14693|         [❗]|
|-94_15804|      [🇮🇹]|
|-94_16596|        [🎈]|
|-94_17167|        [🎄]|
|-94_17381|        [🎄]|
|-94_17386|        [😺]|
|-94_17432|    [🎄, 💌]|
|-94_17447|        [🎓]|
|-94_17483|        [👆]|
|-94_17530|         [✨]|
|-94_17584|        [👪]|
|-94_17835|[🎨, 💐, 🌸]|
|-94_18861|        [🎈]|
|-94_20299|        [😊]|
|-94_23588|        [😜]|
|-94_23605|        [😉]|
|-94_23722|        [😉]|
|-94_26766|    [😃, 🚴]|
|-94_26776|        [📌]|
|-94_26794|        [🔎]|
+---------+------------+
only showing top 20 rows

