In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer, PorterStemmer
from nltk.tokenize import word_tokenize,sent_tokenize,PunktSentenceTokenizer
from nltk.book import FreqDist
import string
from wordcloud import WordCloud,STOPWORDS
stemmer=SnowballStemmer("english")
stop_words=set(stopwords.words("english"))
from twython import Twython
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer
%matplotlib notebook

*** Introductory Examples for the NLTK Book ***
Loading text1, ..., text9 and sent1, ..., sent9
Type the name of the text or sentence to view it.
Type: 'texts()' or 'sents()' to list the materials.
text1: Moby Dick by Herman Melville 1851
text2: Sense and Sensibility by Jane Austen 1811
text3: The Book of Genesis
text4: Inaugural Address Corpus
text5: Chat Corpus
text6: Monty Python and the Holy Grail
text7: Wall Street Journal
text8: Personals Corpus
text9: The Man Who Was Thursday by G . K . Chesterton 1908


In [2]:
TWITTER_APP_KEY = ''
TWITTER_APP_KEY_SECRET = '' 
TWITTER_ACCESS_TOKEN = ''
TWITTER_ACCESS_TOKEN_SECRET = ''

In [3]:
def draw_word(word):
    translater=str.maketrans("","",string.punctuation)
    word=word.translate(translater)
    wordcloud = WordCloud(stopwords=STOPWORDS,background_color='black', width=2400,height=2000).generate(word)
    plt.imshow(wordcloud)

In [4]:
def preprocessing_word(word,stem=False):
    
    translater=str.maketrans("","",string.punctuation)
    word=word.translate(translater)    
    word_tokenized=word_tokenize(word)
    stopwords_removed=[i for i in word_tokenized if i not in stop_words]
    if stem==True:
        ps=PorterStemmer()
        stemmed_words=[ps.stem(w) for w in stopwords_removed]
        return " ".join(stemmed_words)
    else:
        return " ".join(stopwords_removed)

In [11]:
def search_tweet(tweet):
    twitter = Twython(app_key=TWITTER_APP_KEY, 
                app_secret=TWITTER_APP_KEY_SECRET, 
                oauth_token=TWITTER_ACCESS_TOKEN, 
                oauth_token_secret=TWITTER_ACCESS_TOKEN_SECRET)
    query='#'+str(tweet)
    search = twitter.search(q=query, lang="en",result_type='recent',count=100)

    tweets = search['statuses']
    
    tweet_df=pd.DataFrame(columns=["Tweets"])
   
    max_queries=99
    query_count=0
    while 'next_results' in search['search_metadata'].keys() and query_count < max_queries:
        next_max_id=int(search['search_metadata']['next_results'].split('=')[1].split('&')[0])
        search=twitter.search(q=query,count=100,result_type='recent',max_id=next_max_id)
        tweets+=search['statuses']
        query_count+=1
    for tweet_ in tweets:
        tweet_df=tweet_df.append({"Tweets":tweet_['text']},ignore_index=True)
    return tweet_df
        

In [6]:
def make_tweet_string(tweet_df):
    tweet_string=""
    for i in range(len(tweet_df)):
        tweet=' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet_df.iloc[i]["Tweets"]).split())
        tweet_string+=tweet
    return tweet_string

In [7]:
def tweet_words_sorted(tweet_df):
    tweet_string=make_tweet_string(tweet_df)
    tweet_string=preprocessing_word(tweet_string)
    tweet_nltk=nltk.Text(nltk.word_tokenize(tweet_string))
    freq=FreqDist(tweet_nltk)
    tweet_words_sorted_dict=sorted(freq.items(),key=lambda x: x[1],reverse=True)
    tweet_words_sorted_list=[i[0] for i in tweet_words_sorted_dict]
    return tweet_words_sorted_list


In [8]:
def create_list_sentiment(tweet_words_sorted_list): 
    sia=SentimentIntensityAnalyzer()
    Positive_words=[]
    Negative_words=[]
    Neutral_words=[]
    for word in tweet_words_sorted_list:
        sia_dict=(sia.polarity_scores(word))
        if sia_dict['pos']==1:
            Positive_words.append(word)
        elif sia_dict['neg']==1:
            Negative_words.append(word)
        else:
            Neutral_words.append(word)
    return Positive_words,Negative_words,Neutral_words


In [9]:
def tweet_sentiment(tweet):
    tweet_df=search_tweet(tweet)
    draw_word(" ".join(tweet_words_sorted(tweet_df)))
    plt.axis("off")
    plt.savefig(tweet+"_tweets.jpg")
    plt.close()

    Pos,Neg,Neu=create_list_sentiment(tweet_words_sorted(tweet_df))
    draw_word(" ".join(Pos))
    plt.axis("off")
    plt.savefig(tweet+"_positive_tweets.jpg")
    plt.close()

    draw_word(" ".join(Neg))
    plt.axis("off")
    plt.savefig(tweet+"_negative_tweets.jpg")
    plt.close()

