# Install Dependencies

In [1]:
!pip install emoji
!pip install nltk
!pip install pandas
!pip install -U spacy
!python -m spacy download en_core_web_sm

# Run

In [2]:
import json
import re
import spacy
import nltk
import en_core_web_sm
import pandas as pd
from emoji import get_emoji_regexp

nltk.download('wordnet')
nltk.download('vader_lexicon')

from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk import FreqDist
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

In [3]:
text = "TSLA is going to the moon. I think TSLA is the greatest company ever and GM and other car manufacturers don't stand a chance when competing with TSLA"

In [4]:
 # Remove emojis if exists
text = get_emoji_regexp().sub(u'', text)

# Break apart every word in the string into an individual word
tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|http\S+')
tokenized_str = tokenizer.tokenize(text)

# Convert tokens into lowercase
# lower_str_tokenized = [word.lower() for word in tokenized_str]

# Remove stop words
nlp = en_core_web_sm.load()
all_stopwords = nlp.Defaults.stop_words
tokens_without_sw = [word for word in tokenized_str if word not in all_stopwords]

lemmatizer = WordNetLemmatizer()
lemmatized_tokens = ([lemmatizer.lemmatize(word) for word in tokens_without_sw])
stemmer = PorterStemmer()
stem_tokens = ([stemmer.stem(word) for word in tokens_without_sw])
cleaned_output = lemmatized_tokens

# Apply a sentiment analyzer
sia = SIA()
results = []

for sentences in cleaned_output:
    pol_score = sia.polarity_scores(sentences)
    pol_score['words'] = sentences
    results.append(pol_score)

pd.set_option('display.max_columns', None, 'max_colwidth', None)
df = pd.DataFrame.from_records(results)
print(df)

    neg  neu  pos  compound         words
0   0.0  1.0  0.0    0.0000          TSLA
1   0.0  1.0  0.0    0.0000         going
2   0.0  1.0  0.0    0.0000          moon
3   0.0  0.0  0.0    0.0000             I
4   0.0  1.0  0.0    0.0000         think
5   0.0  1.0  0.0    0.0000          TSLA
6   0.0  0.0  1.0    0.6369      greatest
7   0.0  1.0  0.0    0.0000       company
8   0.0  1.0  0.0    0.0000            GM
9   0.0  1.0  0.0    0.0000           car
10  0.0  1.0  0.0    0.0000  manufacturer
11  0.0  1.0  0.0    0.0000           don
12  0.0  0.0  0.0    0.0000             t
13  0.0  1.0  0.0    0.0000         stand
14  0.0  0.0  1.0    0.2500        chance
15  0.0  1.0  0.0    0.0000     competing
16  0.0  1.0  0.0    0.0000          TSLA
