Hactoberfest2020/twitterSentimentAnalysis.py

# -*- coding: utf-8 -*-
"""twitterSentimentAnalysis.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/13BS-Cn_LA3ICQCE6u0zvV3QZB6XJ9D54
"""

# Description: Sentiment analysis of tweets using python

# Import the libraries
import tweepy
from textblob import TextBlob
from wordcloud import WordCloud
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

# Twitter API credentials
Consumer_Key = ""
Consumer_Secret_Key = ""
Access_Token = ""
Access_Token_Secret = ""

# Create the authentication object
authenticate = tweepy.OAuthHandler(Consumer_Key, Consumer_Secret_Key)

# Set access token and access token secret
authenticate.set_access_token(Access_Token, Access_Token_Secret)

# Create API and pass in auth information
api = tweepy.API(authenticate, wait_on_rate_limit= True)

# Extract 100 tweets from a user
posts = api.user_timeline(screen_name = "BillGates", count=100, lang="en", tweet_mode="extended")

# Print 5 tweets from the account
print("The last 5 tweets from this account are : \n")
i=1
for tweet in posts[0:5]:
  print(str(i) + ') ' + tweet.full_text + "\n")
  i+=1

#Store the tweets in a dataframe
df = pd.DataFrame([tweet.full_text for tweet in posts], columns=['tweets'])

#Display 5 tweets
df.head()

#Clean the text

#Create a function to clean the tweets
def cleanTxt(text):
  text = re.sub('@[A-Za-z0-9]+','',text) #Removed @mentions
  text = re.sub('#','',text) #Removed '#'
  text = re.sub('RT[\s]+','',text) #Removed RTs
  text = re.sub('https?:\/\/\s+','',text) #Removed links

  return text

#Pass tweets through the function
df['tweets']=df['tweets'].apply(cleanTxt)

#Show cleaned text
df

#Function to find out the subjectivity of the text
def getSubjectivity(text):
  return(TextBlob(text).sentiment.subjectivity)

#Function to find out the polarity of the text
def getPolarity(text):
  return(TextBlob(text).sentiment.polarity)

#Apply the functions to get subjectivity and polarity for the tweets
df['subjectivity']=df['tweets'].apply(getSubjectivity)
df['polarity']=df['tweets'].apply(getPolarity)

#Show the polarity and subjectivity
df

#Plot the word cloud
allWords = ' '.join([twts for twts in df['tweets']])
wordCloud = WordCloud(width = 500, height = 300, random_state = 21, max_font_size = 119).generate(allWords)

plt.imshow(wordCloud, interpolation= 'bilinear')
plt.axis('off')
plt.show()

# Create a function to classify the text as positive, negative or neutral
def getAnalysis(score):
  if score<0:
    return 'Negative'
  elif score==0:
    return 'Neutral'
  else:
    return 'Positive'

#Apply the function on all the tweets
df['analysis'] = df['polarity'].apply(getAnalysis)

#Print the dataframe
df

#Print all the positive tweets
j=1
sortedDF=df.sort_values(by=['polarity'])
for i in range(0,sortedDF.shape[0]):
  if sortedDF['analysis'][i]=='Positive':
    print(str(j)+') '+sortedDF['tweets'][i])
    print()
    j+=1

#Print the negative tweets
j=1
sortedDF=df.sort_values(by=['polarity'], ascending='False')
for i in range(0,sortedDF.shape[0]):
  if sortedDF['analysis'][i]=='Negative':
    print(str(j)+') '+sortedDF['tweets'][i])
    print()
    j+=1

#Plot a graph between polarity and subjectivity
plt.figure(figsize=(8,6))
for i in range(0, df.shape[0]):
  plt.scatter(df['polarity'][i],df['subjectivity'][i], color='Blue')

plt.title('Sentiment Analysis')
plt.xlabel('Polarity')
plt.ylabel('Subjectivity')
plt.show()

#Find the percentage of positive tweets
ptweets=df[df.analysis=='Positive']
ptweets=ptweets['tweets']

round( (ptweets.shape[0]/df.shape[0])*100 , 1)

#Find the percentage of negative tweets
ntweets=df[df.analysis=='Negative']
ntweets=ntweets['tweets']

round( (ntweets.shape[0]/df.shape[0])*100, 1)

#Show the value counts
df['analysis'].value_counts()

#Plot the value counts
plt.title('Sentiment Analysis')
plt.xlabel('Sentiment')
plt.ylabel('Counts')
df['analysis'].value_counts().plot(kind='bar')
plt.show()