**Sentiment Analysis of Elon Musk Tweets**

In [12]:
import pandas as pd
import numpy as np

# for text processing
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize

# for sentiment analysis using textblob
from textblob import TextBlob

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## **Sentiment Analysis using postive and negative words files**

**Postive-words text file processing and storing all postive words in a dataframe**

In [13]:
f = open('positive-words.txt', 'r')
list_pwords = f.readlines()
list_pwords[0:10]

[';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n',
 '; \n',
 '; Opinion Lexicon: Positive\n',
 ';\n',
 '; This file contains a list of POSITIVE opinion words (or sentiment words).\n',
 ';\n',
 '; This file and the papers can all be downloaded from \n',
 ';    http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html\n',
 ';\n',
 '; If you use this list, please cite one of the following two papers:\n']

In [14]:
type(list_pwords)

list

In [15]:
list_pwords[0:35]

[';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n',
 '; \n',
 '; Opinion Lexicon: Positive\n',
 ';\n',
 '; This file contains a list of POSITIVE opinion words (or sentiment words).\n',
 ';\n',
 '; This file and the papers can all be downloaded from \n',
 ';    http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html\n',
 ';\n',
 '; If you use this list, please cite one of the following two papers:\n',
 ';\n',
 ';   Minqing Hu and Bing Liu. "Mining and Summarizing Customer Reviews." \n',
 ';       Proceedings of the ACM SIGKDD International Conference on Knowledge \n',
 ';       Discovery and Data Mining (KDD-2004), Aug 22-25, 2004, Seattle, \n',
 ';       Washington, USA, \n',
 ';   Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing \n',
 ';       and Comparing Opinions on the Web." Proceedings of the 14th \n',
 ';       International World Wide Web conference (WWW-2005), May 10-14, \n',
 ';       2005, Chiba, Japan.\n',
 ';\n',
 '; Note

In [16]:
# Remove comment lines from Data Frame
list_pwords = list_pwords[35:]
list_pwords[0:10]

['a+\n',
 'abound\n',
 'abounds\n',
 'abundance\n',
 'abundant\n',
 'accessable\n',
 'accessible\n',
 'acclaim\n',
 'acclaimed\n',
 'acclamation\n']

In [17]:
# Replace '\n' with spaces
plist1 = []
for x in list_pwords:
  x = x.replace('\n', '')
  x.lower()
  plist1.append(x)

print(plist1[0:5])

['a+', 'abound', 'abounds', 'abundance', 'abundant']


In [18]:
# Create dataframe for positive words
df_pwords = pd.DataFrame(plist1, columns=['Word'])
df_pwords.head()

Unnamed: 0,Word
0,a+
1,abound
2,abounds
3,abundance
4,abundant


In [19]:
df_pwords['Value'] = +1

In [20]:
df_pwords['Value'].value_counts()

1    2006
Name: Value, dtype: int64

**Negative-words text file processing and storing all negative words in a dataframe.**

In [21]:
f = open('negative-words.txt', 'r', encoding='latin-1')
list_nwords = f.readlines()
list_nwords[0:10]

[';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n',
 '; \n',
 '; Opinion Lexicon: Negative\n',
 ';\n',
 '; This file contains a list of NEGATIVE opinion words (or sentiment words).\n',
 ';\n',
 '; This file and the papers can all be downloaded from \n',
 ';    http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html\n',
 ';\n',
 '; If you use this list, please cite one of the following two papers:\n']

In [22]:
type(list_nwords)

list

In [23]:
list_nwords[0:35]

[';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n',
 '; \n',
 '; Opinion Lexicon: Negative\n',
 ';\n',
 '; This file contains a list of NEGATIVE opinion words (or sentiment words).\n',
 ';\n',
 '; This file and the papers can all be downloaded from \n',
 ';    http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html\n',
 ';\n',
 '; If you use this list, please cite one of the following two papers:\n',
 ';\n',
 ';   Minqing Hu and Bing Liu. "Mining and Summarizing Customer Reviews." \n',
 ';       Proceedings of the ACM SIGKDD International Conference on Knowledge \n',
 ';       Discovery and Data Mining (KDD-2004), Aug 22-25, 2004, Seattle, \n',
 ';       Washington, USA, \n',
 ';   Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing \n',
 ';       and Comparing Opinions on the Web." Proceedings of the 14th \n',
 ';       International World Wide Web conference (WWW-2005), May 10-14, \n',
 ';       2005, Chiba, Japan.\n',
 ';\n',
 '; No

In [24]:
# Remove comment lines from the file
list_nwords = list_nwords[35:]
list_nwords[0:10]

['2-faced\n',
 '2-faces\n',
 'abnormal\n',
 'abolish\n',
 'abominable\n',
 'abominably\n',
 'abominate\n',
 'abomination\n',
 'abort\n',
 'aborted\n']

In [25]:
# replace '\n' with spaces
nlist1 = []
for x in list_nwords:
  x = x.replace('\n', '')
  x.lower()
  nlist1.append(x)

print(nlist1[0:5])

['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable']


In [26]:
# Create a dataframe
df_nwords = pd.DataFrame(nlist1, columns=['Word'])
df_nwords.head()

Unnamed: 0,Word
0,2-faced
1,2-faces
2,abnormal
3,abolish
4,abominable


In [27]:
df_nwords.shape

(4783, 1)

In [28]:
df_nwords['Value'] = -1

In [29]:
df_nwords['Value'].value_counts()

-1    4783
Name: Value, dtype: int64

**Contactenate both words lists**

In [30]:
df_words = df_pwords.append(df_nwords)

In [31]:
df_words.shape

(6789, 2)

In [32]:
df_words.set_index('Word')

Unnamed: 0_level_0,Value
Word,Unnamed: 1_level_1
a+,1
abound,1
abounds,1
abundance,1
abundant,1
...,...
zaps,-1
zealot,-1
zealous,-1
zealously,-1


In [33]:
df_words.dtypes

Word     object
Value     int64
dtype: object

**Processing Eion Musk Tweets**

In [34]:
df_elon = pd.read_csv('Elon_musk.csv', encoding='latin-1')
df_elon.head()

Unnamed: 0.1,Unnamed: 0,Text
0,1,@kunalb11 Im an alien
1,2,@ID_AA_Carmack Ray tracing on Cyberpunk with H...
2,3,@joerogan @Spotify Great interview!
3,4,@gtera27 Doge is underestimated
4,5,@teslacn Congratulations Tesla China for amazi...


In [35]:
df_elon.shape

(1999, 2)

In [36]:
df_elon = df_elon.drop('Unnamed: 0', axis=1)
df_elon.head()

Unnamed: 0,Text
0,@kunalb11 Im an alien
1,@ID_AA_Carmack Ray tracing on Cyberpunk with H...
2,@joerogan @Spotify Great interview!
3,@gtera27 Doge is underestimated
4,@teslacn Congratulations Tesla China for amazi...


In [37]:
# Analyze given text, calculate sentiment score based on positive words and negative words
def sent_score_calc(text):
  sent_score = 0
  tokens = word_tokenize(text.lower())
  for x in tokens:
    df_temp = df_words[df_words['Word']==x]
    if len(df_temp) > 0:
      sent_score += df_temp.iloc[0,1]  
  return sent_score

In [38]:
# Analyze text and create sentiment score column
df_elon['Sentiment_Score'] = df_elon['Text'].apply(sent_score_calc)
df_elon.head()

Unnamed: 0,Text,Sentiment_Score
0,@kunalb11 Im an alien,0
1,@ID_AA_Carmack Ray tracing on Cyberpunk with H...,0
2,@joerogan @Spotify Great interview!,1
3,@gtera27 Doge is underestimated,0
4,@teslacn Congratulations Tesla China for amazi...,2


In [39]:
# Analyze and return sentiment type based on sentiment score
def sentiment_type(score):
  type = ''
  if score > 0:
    type = 'Positive'
  elif score < 0:
    type = 'Negative'
  else:
    type = 'Neutral'
  
  return type

In [40]:
df_elon['Sentiment_Type'] = df_elon['Sentiment_Score'].apply(sentiment_type)

In [41]:
df_elon.head()

Unnamed: 0,Text,Sentiment_Score,Sentiment_Type
0,@kunalb11 Im an alien,0,Neutral
1,@ID_AA_Carmack Ray tracing on Cyberpunk with H...,0,Neutral
2,@joerogan @Spotify Great interview!,1,Positive
3,@gtera27 Doge is underestimated,0,Neutral
4,@teslacn Congratulations Tesla China for amazi...,2,Positive


## **Sentiment analysis using textblob**

In [42]:
df_elon = pd.read_csv('Elon_musk.csv', encoding='latin-1')
df_elon.head()

Unnamed: 0.1,Unnamed: 0,Text
0,1,@kunalb11 Im an alien
1,2,@ID_AA_Carmack Ray tracing on Cyberpunk with H...
2,3,@joerogan @Spotify Great interview!
3,4,@gtera27 Doge is underestimated
4,5,@teslacn Congratulations Tesla China for amazi...


In [43]:
df_elon.shape

(1999, 2)

In [44]:
df_elon = df_elon.drop('Unnamed: 0', axis=1)
df_elon.head()

Unnamed: 0,Text
0,@kunalb11 Im an alien
1,@ID_AA_Carmack Ray tracing on Cyberpunk with H...
2,@joerogan @Spotify Great interview!
3,@gtera27 Doge is underestimated
4,@teslacn Congratulations Tesla China for amazi...


**Define required functions for sentiment analysis**

In [45]:
# Get Subjectivity
def getSubjectivity(text):
  text = text.lower()
  return TextBlob(text).sentiment.subjectivity

# Get polarity
def getPolarity(text):
  text = text.lower()
  return TextBlob(text).sentiment.polarity

# Get sentiment type depending on polarity value
def getSentAnalysis(num):
  sent_type = ''
  if num > 0:
    sent_type = 'Positive'
  elif num == 0:
    sent_type = 'Neutral'
  else:
    sent_type = 'Negative'
  return sent_type



In [46]:
# Apply definitions on dataframe text column
df_elon['Subjectivity'] = df_elon['Text'].apply(getSubjectivity)
df_elon['Polarity'] = df_elon['Text'].apply(getPolarity)

df_elon['Sentiment_Type'] = df_elon['Polarity'].apply(getSentAnalysis)

In [47]:
df_elon.head()

Unnamed: 0,Text,Subjectivity,Polarity,Sentiment_Type
0,@kunalb11 Im an alien,0.75,-0.25,Negative
1,@ID_AA_Carmack Ray tracing on Cyberpunk with H...,0.0,0.0,Neutral
2,@joerogan @Spotify Great interview!,0.75,1.0,Positive
3,@gtera27 Doge is underestimated,0.0,0.0,Neutral
4,@teslacn Congratulations Tesla China for amazi...,0.366667,0.345313,Positive
