Programa para el análisis de sentimiento de tweets.

David Martínez Méndez.

---



In [1]:
#Library import
import tweepy
from textblob import TextBlob
from wordcloud import WordCloud
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")

In [None]:
#Data
from google.colab import files
uploaded = files.upload()

In [None]:
#Get tweet data
log = pd.read_cs('Login.csv')

In [None]:
#API credentials
consumerKey = log['key'][0]
consumerSecret = log['key'][1]
accesToken = log['key'][2]
accesTokenSecret = log['key'][3]

In [None]:
#Authentication Object
authenticate = tweepy.OAuthHandler(consumerKey, consumerSecret)

#Acces token and acces token secret
authenticate.set_acces_token(accesToken, accesTokenSecret)

#API object
api = tweepy.APIU(authenticate, wait_on_rate_limit = True)

In [None]:
#Tweet extraction
posts = api.user_timeline(screen_name = "BillGates", count = 100, lang = "en", tweet_mode = "extended")

#Tweet print
print("Show the 5 recent tweets: \n")
i = 1
for tweet in post[0:5]:
  print(str(i) + ' ) ' + tweet.full_text + '\n')
  i = i + 1

In [None]:
#Create a dataframe with a column called Tweets
df = pd.DataFrame( [tweet.full_text for tweet in posts], columns = ['Tweets'])

#Show fisrt 5rows of data
df.head()

In [None]:
#Clean the data

#Function to clean tweets
def cleanTxt(text):
  text = re.sub(r'@[A-Za-z0-9]+', ' ', text) #to remove @ mentions
  text = re.sub(r'#', ' ', text) #removes the # symbol
  text = re.sub(r'RT[\s]+', ' ', text) #removes retweet rt
  text = re.sub(r'https?:\/\/\S+', ' ', text) #removes hyperlinks

  return text

#Cleaning
df['Tweets'] = df['Tweets'].apply(cleanTxt)

#Cleaned Text
df

In [None]:
#Function to get subjectivity 
def getSubjectivity(text):
  return TextBlob(text).sentiment.subjectivity

#Function to get polarity
def getPolarity(text):
  return TextBlob(text).sentiment.polarity

#Create 2 new columns
df['Subjectivity'] = df['Tweets'].apply(getSubjectivity)
df['Polarity'] = df['Tweets'].apply(getPolarity)

#Show the new dataframe
df

In [None]:
#Plot Text Clouds
allWords = ' '.join([twts for twts in df['Tweets']])
wordCloud = WordCloud(width = 500, height = 300, random_state = 21, max_font_size = 110).generate(allWords)

plt.imshow(wordCloud, interpolation = "bilinear")
plt.axis('off')
plt.show()

In [None]:
#Function to cumpute the negative, neutral and positive analysis
def getAnalysis(score):
  if score < 0:
    return 'Negative'
  elif score == 0:
    return 'Neutral'
  else:
    return 'Positive'

df['Analysis'] = df['Polarity'].apply(getAnalysis)

df

In [None]:
#Print positive tweets
j=1
sortedDF = df.sort_values(by = [Polarity])
for i in range(0, sortedDF.shape[0]):
  if sortedDF['Analysis'][i] == 'Positive':
    print(str(j) + ') '+sortedDF['Tweet'][i])
    print()
    j = j + 1

In [None]:
#Print negative tweets
j=1
sortedDF = df.sort_values(by = [Polarity], ascendig = 'False')
for i in range(0, sortedDF.shape[0]):
  if sortedDF['Analysis'][i] == 'Negative':
    print(str(j) + ') '+sortedDF['Tweet'][i])
    print()
    j = j + 1

In [None]:
#Plot polarity and subjectivity
plt.figure(figsize = (8, 6))
for i in range(0, df.shape[0]):
  plt.scatter(df['Polarity'][i], df['Subjectivity'][i], color = 'Blue')

plt.title('Sentiment Analysis')
plt.xlabel('Polarity')
plt.ylabel('Subjectivity')
plt.show()

In [None]:
#Plot percentage of positive tweets
ptweets = df[df.Analysis == 'Positive']
ptweets = ptweets['Tweets']

round(  (ptweets.shape[0] / df.shape[0]) *100 , 1)

In [None]:
#Plot percentage of negative tweets
ntweets = df[df.Analysis == 'Negative']
ntweets = ntweets['Tweets']

round(  (ntweets.shape[0] / df.shape[0]) *100 , 1)

In [None]:
#Show the value counts
df['Analysis'].value_counts()

#Plot and visualize the counts
plt.title('Sentiment Analysis')
plt.xlabel('Sentiment')
plt.ylabel('Counts')
df['Analysis'].value_counts().plot(kind='bar')
plt.show()