In [None]:
!pip install google_trans_new

In [1]:
import findspark
findspark.init()
import pyspark
import traceback
import json
from pyspark.sql.session import SparkSession
import os
from google_trans_new import google_translator  
import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pyspark.ml import PipelineModel
from pyspark.ml.classification import LogisticRegressionModel

conf = pyspark.SparkConf()
conf.set("spark.executor.extraClassPath",  os.path.join(os.getcwd(), 'extras/sqlite-jdbc-3.34.0.jar'))
conf.set("spark.driver.extraClassPath", os.path.join(os.getcwd(), 'extras/sqlite-jdbc-3.34.0.jar'))

sc = pyspark.SparkContext(appName="TwitterStreamApp", conf=conf)

spark = SparkSession(sc)
translator = google_translator()  

In [2]:
url = 'jdbc:sqlite:' + os.path.join(os.getcwd(), 'database.sqlite')

df = spark.read.jdbc(url, 'Tweet')
df.createOrReplaceTempView('Tweet')

In [3]:
!export JAVA_HOME='/usr/lib/jvm/java-8-openjdk-amd64'
!export PATH=$JAVA_HOME/bin:$PATH

In [4]:
def sentiment_score(tweet):
  
    sid = SentimentIntensityAnalyzer()
  
    sentiment = sid.polarity_scores(tweet)
      
    if sentiment['compound'] >= 0.05:
        return "Positive"
    elif sentiment['compound'] <= - 0.05:
        return "Negative"
    else:
        return 'Neutral'

In [5]:
def get_sql_context_instance(spark_context):
    if ('sqlContextSingletonInstance' not in globals()):
        globals()['sqlContextSingletonInstance'] = SQLContext(spark_context)
    return globals()['sqlContextSingletonInstance']

def process_rdd(time, rdd):
    print("----------- %s -----------" % str(time))
    try:
        sql_context = get_sql_context_instance(rdd.context)
        rf = PipelineModel.load("models/final_idf.model")
        lr = LogisticRegressionModel.load("models/final_lr.model")
        row_rdd = rdd.map(lambda x: (x, translator.translate(
                x['extended_tweet']['full_text'] if x['truncated'] else x['text'], lang_src='es', lang_tgt='en'
            ))).map(lambda x: Row(
            name=x[0]['user']['screen_name'],
            date=datetime.datetime.fromtimestamp(int(x[0]['timestamp_ms']) / 1000).strftime('%Y-%m-%d %H:%M:%S'),
            text=x[0]['extended_tweet']['full_text'] if x[0]['truncated'] else x[0]['text'],
            transtext=x[1],
            sentiment=sentiment_score(x[1])
        ))
        tweets_df = sql_context.createDataFrame(row_rdd)
        prediction = lr.transform(rf.transform(tweets_df)).drop('words').drop('tf')\
            .drop('features').drop('rawPrediction').drop('probability').drop('label').drop('tokens')
        prediction.show()
        prediction.write.mode("append").jdbc(url, 'Tweet')
    except Exception as ex:
        pass
        #traceback.print_exception(type(ex), ex, ex.__traceback__)

In [6]:
from pyspark import SparkConf,SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row,SQLContext
import sys
import requests

In [None]:

sc.setLogLevel("ERROR")
ssc = StreamingContext(sc, 2)
ssc.checkpoint("checkpoint_TwitterApp")
dataStream = ssc.socketTextStream("localhost",9005)
tweets = dataStream.map(lambda x: json.loads(x))

tweets.foreachRDD(process_rdd)
mySsc = ssc

ssc.start()
ssc.awaitTermination()


----------- 2021-05-20 14:46:02 -----------
+-------------+-------------------+--------------------+--------------------+---------+----------+
|         name|               date|                text|           transtext|sentiment|prediction|
+-------------+-------------------+--------------------+--------------------+---------+----------+
|macarenagomez|2021-05-20 14:45:56|Después de 14 mes...|After 14 months, ...| Negative|       0.0|
|      baeele_|2021-05-20 14:45:56|RT @andres_alguac...|RT @andres_alguac...|  Neutral|       0.0|
+-------------+-------------------+--------------------+--------------------+---------+----------+

----------- 2021-05-20 14:46:04 -----------
+----------+-------------------+--------------------+--------------------+---------+----------+
|      name|               date|                text|           transtext|sentiment|prediction|
+----------+-------------------+--------------------+--------------------+---------+----------+
|fuentesmzd|2021-05-20 14:45:

In [None]:
ssc.stop(False)