In [1]:
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import twitter_samples, stopwords
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from nltk import FreqDist, classify, NaiveBayesClassifier
import re, string, random
from textblob import TextBlob
import tweepy
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from wordcloud import WordCloud, STOPWORDS
import PySimpleGUI as sg

plt.style.use('ggplot')
pd.set_option('max_columns', None)
pd.set_option("max_rows", None)
pd.set_option("max_colwidth", 250)
positive = 0
negative = 0
neutral = 0
polarity = 0
tweet_list = []
clean_tweet_list = []
neutral_list = []
negative_list = []
positive_list = []

# remove all the useless item
def remove_noise(tweet_tokens, stop_words = ()):
    cleaned_tokens = []

    for token, tag in pos_tag(tweet_tokens):
        #remove tag, hashtags, hyperlink
        cleanText(token)

        if tag.startswith("NN"):
            pos = 'n' #NOUN
        elif tag.startswith('VB'):
            pos = 'v' #VERB
        else:
            pos = 'a' #ADJ
            
        # this function is to converting a word to its canonical form such as running, ran --> run
        lemmatizer = WordNetLemmatizer()
        token = lemmatizer.lemmatize(token, pos)

        # remove stop word
        if len(token) > 0 and token not in string.punctuation and token.lower() not in stop_words:
            cleaned_tokens.append(token.lower())
    return cleaned_tokens


def cleanText(text):
    text = re.sub(r'@[A-Za-z0-9_-]+', '', text) # remove tag
    text = re.sub(r'#[A-Za-z0-9_-]+', '', text) #remove hashtag
    text = re.sub(r'https?:\/\/\S+', '', text) #remove hyperlink
    text = re.sub(r'[\n]+', '', text) #remove nextline
    text = re.sub(r'RT : ', '', text) #remove RT
    return text

#takes a list of tweets as an argument to provide a list of words in all of the tweet tokens joined.
def get_all_words(cleaned_tokens_list):
    for tokens in cleaned_tokens_list:
        for token in tokens:
            yield token #yield is use in a generator, func are almost same with return

            
# preparation for passing data list into model
#convert into dictionary(word as key, true as value)
def get_tweets_for_model(cleaned_tokens_list):
    for tweet_tokens in cleaned_tokens_list:
        yield dict([token, True] for token in tweet_tokens)
        
        
def preRUN():
    positive_tweets = twitter_samples.strings('positive_tweets.json')
    negative_tweets = twitter_samples.strings('negative_tweets.json')
    # text = twitter_samples.strings('tweets.20150430-223406.json')
    # tweet_tokens = twitter_samples.tokenized('positive_tweets.json')[0]

    stop_words = stopwords.words('english')

    positive_tweet_tokens = twitter_samples.tokenized('positive_tweets.json')
    negative_tweet_tokens = twitter_samples.tokenized('negative_tweets.json')

    positive_cleaned_tokens_list = []
    negative_cleaned_tokens_list = []

    for tokens in positive_tweet_tokens:
        positive_cleaned_tokens_list.append(remove_noise(tokens, stop_words))

    for tokens in negative_tweet_tokens:
        negative_cleaned_tokens_list.append(remove_noise(tokens, stop_words))

    all_pos_words = get_all_words(positive_cleaned_tokens_list)

    freq_dist_pos = FreqDist(all_pos_words)
    # print(freq_dist_pos.most_common(10))

    positive_tokens_for_model = get_tweets_for_model(positive_cleaned_tokens_list)
    negative_tokens_for_model = get_tweets_for_model(negative_cleaned_tokens_list)

    positive_dataset = [(tweet_dict, "Positive")
                         for tweet_dict in positive_tokens_for_model]

    negative_dataset = [(tweet_dict, "Negative")
                         for tweet_dict in negative_tokens_for_model]

    dataset = positive_dataset + negative_dataset

    random.shuffle(dataset)

    train_data = dataset[:7000]
    test_data = dataset[7000:]
    global classifier
    classifier = NaiveBayesClassifier.train(train_data)
    global A
    A = classify.accuracy(classifier, test_data)
    
def NBSentimentAnalysis(custom_tweet):
    custom_tokens = remove_noise(word_tokenize(custom_tweet))
    answer = classifier.classify(dict([token, True] for token in custom_tokens))
    return answer
    
def searchInTwitter():
    # get the API key
    log = pd.read_csv('TwitterAPI.csv')

    # Authentication
    consumerKey = log['key'][0]
    consumerSecret = log['secret'][0]
    accessToken = log['token'][0]
    accessTokenSecret = log['tokenSecret'][0]

    # Create the authentication object
    auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
    # Set the access token and token secret
    auth.set_access_token(accessToken, accessTokenSecret)
    #create api object
    api = tweepy.API(auth, wait_on_rate_limit = True)

    
    #---------------------------------------------------------------------------UI PART
    # Define the window's contents
    layout = [  [sg.Text("Please enter keyword or hashtag to search: ")],
              [sg.Input()],
              [sg.Text("Please enter how many tweets to analyze: ")],
              [sg.Input()],
              [sg.Button('OK')] ]

    # Create the window
    window = sg.Window('Sentiment Analysis System', layout)
    # Display and interact with the Window
    event, values = window.read()
    # Finish up by removing from the screen
    window.close()
    #---------------------------------------------------------------------------UI PART
    
    # extract what item what quantity to crawl
    global keyword
    keyword = values[0]
    noOfTweet = int(values[1])
    posts = tweepy.Cursor(api.search, q=keyword, lang="en").items(noOfTweet)

    print("These is the post:")
    i = 1
    for post in posts:
        tweet_list.append(post.text)
        print("  "+str(i)+")"+post.text)
        print("\n\n");
        i += 1

def getSubjectivity(text):
    #the range of [0,1], 1 means a public opinion and not a factual information
    return TextBlob(text).sentiment.subjectivity 

def getPolarity(text):
    #the range of [-1,1], -1 means negatif , 1 means positif
    return TextBlob(text).sentiment.polarity 

def getAnalysis(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    elif score > 0:
        return 'Positive'
    
def uiNB():
    window2 = sg.Window("Percentage of TB_Analysis", [  [sg.Text("Positive percentage : "+ str(posp))],
          [sg.Text("Neutral percentage : "+ str(neup))],
          [sg.Text("Negative percentage : "+ str(negp))],
          [sg.Button('Show BarChart')],
          [sg.Button('Show PieChart')] ])
    while True:
        e,v = window2.read()
        if e == "Show BarChart":
            getBarChartTB()
        elif e == "Show PieChart":
            getPieChart()
        elif e == sg.WIN_CLOSED:
            break

def uiTB():
    window3 = sg.Window("Percentage of NB_Analysis", [  [sg.Text("Positive percentage : "+ str(posp))],
          [sg.Text("Neutral percentage : "+ str(neup))],
          [sg.Text("Negative percentage : "+ str(negp))],
          [sg.Button('Show BarChart')],
          [sg.Button('Show PieChart')] ])
    while True:
        e,v = window3.read()
        if e == "Show BarChart":
            getBarChartNB()
        elif e == "Show PieChart":
            getPieChart()
        elif e == sg.WIN_CLOSED:
            break
    
def TextBlobSentimentAnalysis():
    global df
    df = pd.DataFrame([tweet for tweet in tweet_list], columns=['Tweet'])
    df['Tweet'] = df['Tweet'].apply(cleanText)
    df['Subjectivity'] = df['Tweet'].apply(getSubjectivity)
    df['Polarity'] = df['Tweet'].apply(getPolarity)
    df['TB_Analysis'] = df['Polarity'].apply(getAnalysis)
    df['NB_Analysis'] = df['Tweet'].apply(NBSentimentAnalysis)
    print('-----------------------------------------------------')
    df.head()
    global ddf
    ddf = pd.DataFrame([tweet for tweet in tweet_list], columns=['Tweet'])
    ddf['Tweet'] = ddf['Tweet'].apply(cleanText)
  
    
    #---------------------------------------------------------------------------UI PART
    sg.set_options(auto_size_buttons=True)
    data = []
    layout = [
        [sg.Text("List of tweet")],
        [sg.Table(values=ddf.values.tolist(),
                  headings=list(ddf.columns.values),
                  display_row_numbers=True, col_widths=500,
                  num_rows=max(10,len(data))
                 )],
        [sg.Button("Show system accuracy"),sg.Button("Show the sentiment analysis"), sg.Button("Cancel")]
    ]
    window = sg.Window('Tweet Table', layout, grab_anywhere=False)
    global posp
    global neup
    global negp

    while True:
        event, values = window.read()
        if event == "Show system accuracy":
            print("The accuracy is :",str(A))
            classifier.show_most_informative_features(20)
        elif event == "Show the sentiment analysis":
            
            window4 = sg.Window('Result of Analysis',
                                [ [sg.Text("List of tweet with analysis")],
                                [sg.Table(values=df.values.tolist(),
                                          headings=list(df.columns.values),
                                          display_row_numbers=True,
                                          auto_size_columns=True,
                                          num_rows=max(10,len(data))
                                         )],
                                 [sg.Button("Show percentage of TB_Analysis"),
                                  sg.Button("Show percentage of NB_Analysis")]], grab_anywhere=False)
            
            while True:
                event4, value4 = window4.read()
                if event4 == "Show percentage of TB_Analysis":
                    postweets = df[df.TB_Analysis == 'Positive']
                    postweets = postweets['Tweet']
                    posp = round(postweets.shape[0] / df.shape[0] * 100, 1)

                    neutweets = df[df.TB_Analysis == 'Neutral']
                    neutweets = neutweets['Tweet']
                    neup = round(neutweets.shape[0] / df.shape[0] * 100, 1)

                    negtweets = df[df.TB_Analysis == 'Negative']
                    negtweets = negtweets['Tweet']
                    negp = round(negtweets.shape[0] / df.shape[0] * 100, 1)

                    uiTB()

                elif event4 == "Show percentage of NB_Analysis":
                    postweets = df[df.NB_Analysis == 'Positive']
                    postweets = postweets['Tweet']
                    posp = round(postweets.shape[0] / df.shape[0] * 100, 1)

                    neutweets = df[df.NB_Analysis == 'Neutral']
                    neutweets = neutweets['Tweet']
                    neup = round(neutweets.shape[0] / df.shape[0] * 100, 1)

                    negtweets = df[df.NB_Analysis == 'Negative']
                    negtweets = negtweets['Tweet']
                    negp = round(negtweets.shape[0] / df.shape[0] * 100, 1)

                    uiNB()
                elif event4 == sg.WIN_CLOSED:
                    break
        elif event == "Cancel":
            break
        elif event == sg.WIN_CLOSED:
            break
    window.close()
    #---------------------------------------------------------------------------UI PART
    


def getBarChartTB():
    # show the value counts in bar chart
    plt.title("TB_Analysis Value Counts")
    plt.xlabel("Sentiment")
    plt.ylabel("Counts")
    df['TB_Analysis'].value_counts().plot(kind='bar')
    plt.show()
def getBarChartNB():
    # show the value counts in bar chart
    plt.title("NB_Analysis Value Counts")
    plt.xlabel("Sentiment")
    plt.ylabel("Counts")
    df['NB_Analysis'].value_counts().plot(kind='bar')
    plt.show()

def getPieChart():
    #Creating PieCart
    labels = ['Positive ['+str(posp)+'%]' , 'Neutral ['+str(neup)+'%]','Negative ['+str(negp)+'%]']
    sizes = [posp, neup, negp]
    colors = ["yellowgreen", "blue","red"]
    patches, texts = plt.pie(sizes,colors=colors, startangle=90)
    plt.legend(labels)
    plt.title("Sentiment Analysis Result for keyword = "+keyword+"" )
    plt.axis("equal")
    plt.show()
    
    
    
if __name__ == "__main__":
    #---------------------------------------------------------------------------UI PART
    # Define the window's contents
    layout = [  [sg.Text("Thanks for using this Sentiment Analysis System.")],
              [sg.Text("Please wait about 30 seconds for the machine to prepare")],
              [sg.Button('OK')] ]

    # Create the window
    window = sg.Window('Sentiment Analysis System', layout)
    # Display and interact with the Window
    event, values = window.read()
    # Finish up by removing from the screen
    window.close()
    #---------------------------------------------------------------------------UI PART
    preRUN()
    searchInTwitter()
    TextBlobSentimentAnalysis()

These is the post:
  1)We're on again for #buildinpublic #2
 of the @supabase_io python library on 30th May 9pm-10pm GMT +8 /6am PST.

Thi… https://t.co/qtNGqRc2kr



  2)RT @cepribwriters: Get quality grades for your online classes and delivery on time
#maths 
#Paperpay
#python
#chemistry 
#WomenWhoCode 
#As…



-----------------------------------------------------
*** tk version 8.6.9 detected.... patching ttk treeview code ***


In [10]:
import matplotlib.pyplot as pPlot
from wordcloud import WordCloud, STOPWORDS
import numpy as npy
from PIL import Image
dataset = ['Hello','world','NIHAO','wo','zai','na','ya','ta']
def create_word_cloud(string):
    maskArray = npy.array(Image.open("cloud.png"))
    cloud = WordCloud(background_color = "white", max_words = 200, mask = maskArray, stopwords = set(STOPWORDS))
    cloud.generate(string)
    cloud.to_file("wordCloud.png")
dataset = str(dataset)
create_word_cloud(dataset)

In [18]:
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import twitter_samples, stopwords
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from nltk import FreqDist, classify, NaiveBayesClassifier
import re, string, random
from textblob import TextBlob
import tweepy
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from wordcloud import WordCloud, STOPWORDS
import PySimpleGUI as sg
from PIL import Image
def transform_format(val):
    wine_mask = np.array(Image.open("cloud.png"))
    if val == 0:
        return 255
    else:
        return val
dataset = ['Hello','world','NIHAO','wo','zai','na','ya','ta']  
#def createWordCloud(dataset):
# Transform your mask into a new one that will work with the function:
transformed_wine_mask = np.ndarray((wine_mask.shape[0],wine_mask.shape[1]), np.int32)

for i in range(len(wine_mask)):
    transformed_wine_mask[i] = list(map(transform_format, wine_mask[i]))

stopwords = set(STOPWORDS)

# Create a word cloud image
wc = WordCloud(background_color="white", max_words=1000, mask=transformed_wine_mask,
               stopwords=stopwords, contour_width=3, contour_color='blue')

wc.generate(str(dataset))

# store to file
wc.to_file("output.png")
# show
plt.figure(figsize=[20,10])
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()

KeyboardInterrupt: 

KeyboardInterrupt: 

In [24]:

dataset = ['Hello','world','NIHAO','wo','zai','na','ya','ta']  
wine_mask = np.array(Image.open("cloud.png"))

def transform_format(val):
    if val == 0:
        return 255
    else:
        return val

transformed_wine_mask = np.ndarray((wine_mask.shape[0],wine_mask.shape[1]), np.int32)

for i in range(len(wine_mask)):
    transformed_wine_mask[i] = list(map(transform_format, wine_mask[i]))

# Create a word cloud image
wc = WordCloud(background_color="white", max_words=1000, mask=transformed_wine_mask,
               stopwords=stopwords, contour_width=3, contour_color='firebrick')

# Generate a wordcloud
wc.generate(str(dataset))

# store to file
wc.to_file("wordCloud.png")

# show
plt.figure(figsize=[20,10])
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()

TypeError: 'LazyCorpusLoader' object is not iterable

In [23]:
wine_mask
transformed_wine_mask

array([[255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       ...,
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255]])