In [1]:
!pip install plotly --upgrade

Collecting plotly
  Downloading plotly-5.5.0-py2.py3-none-any.whl (26.5 MB)
[K     |████████████████████████████████| 26.5 MB 113.9 MB/s 
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-5.5.0 tenacity-8.0.1


In [2]:
!pip install translate

Collecting translate
  Downloading translate-3.6.1-py2.py3-none-any.whl (12 kB)
Collecting libretranslatepy==2.1.1
  Downloading libretranslatepy-2.1.1-py3-none-any.whl (3.2 kB)
Installing collected packages: libretranslatepy, translate
Successfully installed libretranslatepy-2.1.1 translate-3.6.1


In [3]:
from google.colab import drive 

import nltk
from nltk.stem import SnowballStemmer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('punkt')
nltk.download('vader_lexicon')

from translate import Translator

from textblob import TextBlob

import tweepy
from tweepy import OAuthHandler

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.feature_extraction.text import CountVectorizer

import ipywidgets as widgets
from IPython.display import display, clear_output 

import os
import sys
import re
import string
import itertools
import threading
import time
import random



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [4]:
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''

In [5]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth)

In [6]:
def get_tweets(query, max_tweets=10, lang='en'):
    searched_tweets = []
    last_id = -1
    while len(searched_tweets) < max_tweets:
        count = max_tweets - len(searched_tweets)
        try:
            new_tweets = api.search(q=query, count=count, lang=lang, max_id=str(last_id - 1))
            if not new_tweets:
                break
            searched_tweets.extend(new_tweets)
            last_id = new_tweets[-1].id
        except tweepy.TweepError as e:
            break
    
    return searched_tweets

def percentage(part,whole):
    return 100 * float(part)/float(whole)

def count_values_in_column(data,feature):
    total = data.loc[:,feature].value_counts(dropna=False)
    percentage=round(data.loc[:, feature].value_counts(dropna=False, normalize=True) * 100, 2)
    return pd.concat([total, percentage], axis=1, keys=['Total', 'Percentage'])

In [9]:
def analyze_sentiments(query, nb_tweets, lang='en'):
    # Fetch tweets and put them in DataFrame
    tweets = get_tweets(query, nb_tweets, lang)

    tw_list = pd.DataFrame([tweet.text for tweet in tweets])
    tw_list.drop_duplicates(inplace = True)
    tw_list["text"] = tw_list[0]

    # Remove extra characters from tweets' texts
    remove_rt = lambda x: re.sub('RT @\w+: ', " ", x)
    rt = lambda x: re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", x)
    tw_list["text"] = tw_list.text.map(remove_rt).map(rt)
    tw_list["text"] = tw_list.text.str.lower()

    # Translation part - freezed
    if lang != 'en':
        translator= Translator(to_lang=lang)

        secret = ' ' + str(random.uniform(1000, 99999999)) + ' '
        s = (secret).join(tw_list['text'].astype('str'))

        try:
            translation = translator.translate(s)
            print(translation)
        except:
            print('Error: Could not translate, please stick to english for now!')
            return None
        
        translation.split(secret)

        tw_list["text"] = translation

    # Applying texblob sentiment analysis on cleaned texts
    tw_list[['polarity', 'subjectivity']] = tw_list['text'].apply(lambda x: pd.Series(TextBlob(x).sentiment))

    for index, row in tw_list['text'].iteritems():
        score = SentimentIntensityAnalyzer().polarity_scores(row)
        neg = score['neg']
        neu = score['neu']
        pos = score['pos']
        comp = score['compound']

        if neg > pos:
            tw_list.loc[index, 'sentiment'] = "negative"
        elif pos > neg:
            tw_list.loc[index, 'sentiment'] = "positive"
        else:
            tw_list.loc[index, 'sentiment'] = "neutral"
            
        tw_list.loc[index, 'neg'] = neg
        tw_list.loc[index, 'neu'] = neu
        tw_list.loc[index, 'pos'] = pos
        tw_list.loc[index, 'compound'] = comp


    # Make and return graphic
    piechart = count_values_in_column(tw_list, "sentiment")
    names = piechart.index

    fig = px.pie(
                    piechart.reset_index(),
                    values='Percentage',
                    names='index',
                    color='index',
                    color_discrete_map={
                                'positive':'green',
                                'negative':'red',
                                'neutral':'blue',
                            },
                    title=("Sentiment Analysis Result for keyword: \"" + query + "\"."),
            )
    
    return fig

In [10]:
text = widgets.Text(description='Query:')
nb = widgets.BoundedIntText(
    min=1,
    max=2500,
    step=1,
    description='Nb tweets:',
    disabled=False
)
text2 = widgets.Text(value='en', description='Language:')

dropdown = widgets.Dropdown(
    options=['en'],#, 'fr', 'ar'],
    description='Language:',
    disabled=False,
)

button = widgets.Button(description="Analyze")
output = widgets.Output()

def on_button_clicked(b):
    with output:
        query = text.value
        nb_tweets = nb.value
        lang = dropdown.value

        clear_output()

        fig = analyze_sentiments(query, nb_tweets, lang)
        if fig is not None: 
            fig.show()
    
button.on_click(on_button_clicked)
display(text, nb, dropdown, button, output)

Text(value='', description='Query:')

BoundedIntText(value=1, description='Nb tweets:', max=2500, min=1)

Dropdown(description='Language:', options=('en',), value='en')

Button(description='Analyze', style=ButtonStyle())

Output()