# Install Dependencies

In [46]:
# !pip install emoji
# !pip install nltk
# !pip install pandas
# !pip install -U spacy
# !python -m spacy download en_core_web_sm

# Run

In [47]:
import json
import re
import spacy
import nltk
import en_core_web_sm
import pandas as pd
from emoji import get_emoji_regexp

# nltk.download('wordnet')
# nltk.download('vader_lexicon')

from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk import FreqDist
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

In [48]:
text = "TSLA is going to the moon. I think TSLA is the greatest company ever and GM and other car manufacturers don't stand a chance when competing with TSLA"

In [49]:
 # Remove emojis if exists
text = get_emoji_regexp().sub(u'', text)

# Break apart every word in the string into an individual word
tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|http\S+')
tokenized_str = tokenizer.tokenize(text)
print(tokenized_str)

# Convert tokens into lowercase
lower_str_tokenized = [word.lower() for word in tokenized_str]

# Remove stop words
nlp = en_core_web_sm.load()
all_stopwords = nlp.Defaults.stop_words
print(all_stopwords)
tokens_without_sw = [word for word in lower_str_tokenized if word not in all_stopwords]

lemmatizer = WordNetLemmatizer()
lemmatized_tokens = ([lemmatizer.lemmatize(word) for word in tokens_without_sw])
stemmer = PorterStemmer()
stem_tokens = ([stemmer.stem(word) for word in tokens_without_sw])
cleaned_output = lemmatized_tokens

# Apply a sentiment analyzer
sia = SIA()
results = []

for sentences in cleaned_output:
    pol_score = sia.polarity_scores(sentences)
    pol_score['words'] = sentences
    results.append(pol_score)

pd.set_option('display.max_columns', None, 'max_colwidth', None)
df = pd.DataFrame.from_records(results)
print(df)

['TSLA', 'is', 'going', 'to', 'the', 'moon', 'I', 'think', 'TSLA', 'is', 'the', 'greatest', 'company', 'ever', 'and', 'GM', 'and', 'other', 'car', 'manufacturers', 'don', 't', 'stand', 'a', 'chance', 'when', 'competing', 'with', 'TSLA']
{'each', 'as', 'last', 'who', '‘s', 'against', 'n’t', 'is', 'put', 'can', 'up', 'been', 'everyone', 'the', 'do', 'really', 'three', 'thru', 'twelve', 'are', 'anyone', '’ll', 'whole', 'yourselves', 'me', 'seeming', 'about', "n't", 'nevertheless', 'four', 'and', '‘ll', 'during', 'or', 'ours', 'several', 'than', 'on', 'themselves', 'yet', 'enough', 'while', 'does', 'few', "'ve", 'take', 'first', 'else', 'to', 'something', 'your', 'same', 'many', 'became', 'least', 'go', 'since', 'quite', 'seem', 'others', 'whose', 'once', 'within', 'along', 'thus', 'be', 'it', '‘m', 'amongst', 'somehow', '‘d', 'two', 'myself', 'unless', 'even', 'beyond', 'doing', 'upon', 'for', 'hence', 'twenty', 'hereby', 'former', 'nothing', 'still', 'please', 'so', 'itself', 'what', 'mo