# Install Dependencies

In [49]:
# !pip install emoji
# !pip install nltk
# !pip install pandas
# !pip install -U spacy
# !python -m spacy download en_core_web_sm

# Run

In [50]:
import json
import re
import spacy
import nltk
import en_core_web_sm
import pandas as pd
from emoji import get_emoji_regexp

# nltk.download('wordnet')
# nltk.download('vader_lexicon')
# nltk.download('punkt')

from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer, sent_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk import FreqDist
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

In [51]:
text = "TSLA is going to the moon. I think TSLA is the greatest company ever and GM and other car manufacturers don't stand a chance when competing with TSLA"

In [52]:
# Load tickers
df = pd.read_csv('./cleaned_tickers.csv')
tickers = df['ticker'].tolist()
# Remove emojis if exists
text = get_emoji_regexp().sub(u'', text)
text = re.sub(r"and|or", ".", text)

# Break apart every word in the string into an individual word
tokenizer = RegexpTokenizer('[^\.]+')
tokenized_str = tokenizer.tokenize(text)
tokenized_str = sent_tokenize(text)

print(tokenized_str)

# Remove stop words
nlp = en_core_web_sm.load()
all_stopwords = nlp.Defaults.stop_words
tokens_without_sw = [word for word in tokenized_str if word not in all_stopwords]

lemmatizer = WordNetLemmatizer()
lemmatized_tokens = ([lemmatizer.lemmatize(word) for word in tokens_without_sw])
stemmer = PorterStemmer()
stem_tokens = ([stemmer.stem(word) for word in tokens_without_sw])
cleaned_output = lemmatized_tokens

# Apply a sentiment analyzer
sia = SIA()
result = []

for sentences in cleaned_output:
    data = dict()
    pol_score = sia.polarity_scores(sentences)
    data['ticker'] = None

    for ticker in tickers:
        if ticker in sentences:
            data['ticker'] = ticker

    if not data['ticker']:
        continue
    
    data['sentiment_score'] = pol_score['compound']
    result.append(data)
    
print(result)

['TSLA is going to the moon.', 'I think TSLA is the greatest company ever .', 'GM .', "other car manufacturers don't st. a chance when competing with TSLA"]
[{'ticker': 'TSLA', 'sentiment_score': 0.0}, {'ticker': 'TSLA', 'sentiment_score': 0.6369}, {'ticker': 'M', 'sentiment_score': 0.0}, {'ticker': 'TSLA', 'sentiment_score': -0.1877}]
