In [20]:
import spacy
import pandas as pd
from scraper import YoutubeScraper
import plotly.express as px
import plotly.graph_objects as go

In [21]:
coins = ['XRP', 'BTC', 'ETH', 'DOGE']
yts = YoutubeScraper()
df = yts.RUN(coins)

In [22]:
#nlp pretrained models
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

from textblob import TextBlob

import flair
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')
def estimate_flair(sentence):
    s = flair.data.Sentence(sentence)
    flair_sentiment.predict(s)
    return s.labels



2021-07-16 08:52:50,439 loading file C:\Users\jebli\.flair\models\sentiment-en-mix-distillbert_4.pt


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\jebli\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [23]:
df

Unnamed: 0,Query,Titles,Clean_Titles
0,XRP,Shocking XRP Revelation (Bitcoin's Make Or Bre...,shocking xrp revelation bitcoins make or break...
1,XRP,only 0.0024% of people IN THE WORLD OWN 5000 X...,only of people in the world own xrp stop buy...
2,XRP,"Ripple/XRP-BoA & Crypto?,Ripple 12 Countries,F...",ripplexrpboa cryptoripple countriesflarenetw...
3,XRP,"Ripple Wins Deposition , SEC Refuses Judge Ord...",ripple wins deposition sec refuses judge orde...
4,XRP,Attorney Hogan Talks RIPPLE VICTORY at Hearing...,attorney hogan talks ripple victory at hearing...
...,...,...,...
94,DOGE,Baby doge coin Good news | wink holders be rea...,baby doge coin good news wink holders be read...
95,DOGE,Dogecoin & Bitcoin At A Tipping Point!! Key Le...,dogecoin bitcoin at a tipping point key level...
96,DOGE,DOGECOIN CREATOR SAYS ALL CRYPTO IS BAD! DOGEC...,dogecoin creator says all crypto is bad dogeco...
97,DOGE,DOGECOIN BREAKING NEWS!!! INFLATION! BIG BOUNC...,dogecoin breaking news inflation big bounce d...


In [24]:
df['NLTK_sentiment'] = df['Clean_Titles'].apply(lambda sentence: sid.polarity_scores(sentence))
df['TextBlob_sentiment'] = df['Clean_Titles'].apply(lambda sentence: TextBlob(sentence).sentiment)
df['Flair_sentiment'] = df['Clean_Titles'].apply(lambda sentence: estimate_flair(sentence))

In [25]:
df

Unnamed: 0,Query,Titles,Clean_Titles,NLTK_sentiment,TextBlob_sentiment,Flair_sentiment
0,XRP,Shocking XRP Revelation (Bitcoin's Make Or Bre...,shocking xrp revelation bitcoins make or break...,"{'neg': 0.278, 'neu': 0.722, 'pos': 0.0, 'comp...","(-1.0, 1.0)",[NEGATIVE (0.8195)]
1,XRP,only 0.0024% of people IN THE WORLD OWN 5000 X...,only of people in the world own xrp stop buy...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...","(0.3, 1.0)",[NEGATIVE (0.7266)]
2,XRP,"Ripple/XRP-BoA & Crypto?,Ripple 12 Countries,F...",ripplexrpboa cryptoripple countriesflarenetw...,"{'neg': 0.0, 'neu': 0.787, 'pos': 0.213, 'comp...","(0.0, 0.0)",[POSITIVE (0.983)]
3,XRP,"Ripple Wins Deposition , SEC Refuses Judge Ord...",ripple wins deposition sec refuses judge orde...,"{'neg': 0.0, 'neu': 0.654, 'pos': 0.346, 'comp...","(0.09999999999999999, 0.4)",[POSITIVE (0.6699)]
4,XRP,Attorney Hogan Talks RIPPLE VICTORY at Hearing...,attorney hogan talks ripple victory at hearing...,"{'neg': 0.243, 'neu': 0.757, 'pos': 0.0, 'comp...","(-0.35, 0.8)",[POSITIVE (0.9499)]
...,...,...,...,...,...,...
94,DOGE,Baby doge coin Good news | wink holders be rea...,baby doge coin good news wink holders be read...,"{'neg': 0.0, 'neu': 0.759, 'pos': 0.241, 'comp...","(0.44999999999999996, 0.55)",[POSITIVE (0.9921)]
95,DOGE,Dogecoin & Bitcoin At A Tipping Point!! Key Le...,dogecoin bitcoin at a tipping point key level...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","(0.0, 1.0)",[POSITIVE (0.9838)]
96,DOGE,DOGECOIN CREATOR SAYS ALL CRYPTO IS BAD! DOGEC...,dogecoin creator says all crypto is bad dogeco...,"{'neg': 0.212, 'neu': 0.788, 'pos': 0.0, 'comp...","(-0.6999999999999998, 0.6666666666666666)",[NEGATIVE (0.9999)]
97,DOGE,DOGECOIN BREAKING NEWS!!! INFLATION! BIG BOUNC...,dogecoin breaking news inflation big bounce d...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","(0.0, 0.1)",[POSITIVE (0.5597)]


In [26]:
color_dict = {'XRP':'blue', 'BTC':'red', 'ETH':'green', 'DOGE':'black'}

def nltk_line_plot(df):
    # temp = pd.DataFrame()
    # temp['Coin'] = df['Query']
    # temp['NLTK_raw'] = df['NLTK_sentiment']
    # start = 
    # temp['Negative'] = temp['NLTK_raw'].apply(lambda x:x['neg'])
    # temp['Neutral'] = temp['NLTK_raw'].apply(lambda x:x['neu'])
    # temp['Positive'] = temp['NLTK_raw'].apply(lambda x:x['pos'])
    fig = go.Figure()
    for i in range(df.shape[0]):
        temp = df.iloc[i]
        fig.add_trace(go.Scatter3d(x=[0, temp['NLTK_sentiment']['neg']], y=[0, temp['NLTK_sentiment']['neu']], z=[0, temp['NLTK_sentiment']['pos']],  mode='lines', marker=dict(color=color_dict[temp['Query']])))
    fig.show()

In [27]:
nltk_line_plot(df)

In [29]:
#flatten dict cols
def flatten_dict(df, column, optional_prefix=''):
    """Return each item in a dictionary as a new column."""
    keys = df[column].iloc[0].keys()
    for key in keys:
        df[optional_prefix+key] = df[column].apply(lambda x: x[key])
    return df

df = flatten_dict(df, 'NLTK_sentiment', optional_prefix='NLTK_')


def flatten_list(df, column, option_prefix):
    pass

df.head()



Unnamed: 0,Query,Titles,Clean_Titles,NLTK_sentiment,TextBlob_sentiment,Flair_sentiment,NLTKneg,NLTKneu,NLTKpos,NLTKcompound
0,XRP,Shocking XRP Revelation (Bitcoin's Make Or Bre...,shocking xrp revelation bitcoins make or break...,"{'neg': 0.278, 'neu': 0.722, 'pos': 0.0, 'comp...","(-1.0, 1.0)",[NEGATIVE (0.8195)],0.278,0.722,0.0,-0.4019
1,XRP,only 0.0024% of people IN THE WORLD OWN 5000 X...,only of people in the world own xrp stop buy...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...","(0.3, 1.0)",[NEGATIVE (0.7266)],0.18,0.82,0.0,-0.296
2,XRP,"Ripple/XRP-BoA & Crypto?,Ripple 12 Countries,F...",ripplexrpboa cryptoripple countriesflarenetw...,"{'neg': 0.0, 'neu': 0.787, 'pos': 0.213, 'comp...","(0.0, 0.0)",[POSITIVE (0.983)],0.0,0.787,0.213,0.2263
3,XRP,"Ripple Wins Deposition , SEC Refuses Judge Ord...",ripple wins deposition sec refuses judge orde...,"{'neg': 0.0, 'neu': 0.654, 'pos': 0.346, 'comp...","(0.09999999999999999, 0.4)",[POSITIVE (0.6699)],0.0,0.654,0.346,0.5719
4,XRP,Attorney Hogan Talks RIPPLE VICTORY at Hearing...,attorney hogan talks ripple victory at hearing...,"{'neg': 0.243, 'neu': 0.757, 'pos': 0.0, 'comp...","(-0.35, 0.8)",[POSITIVE (0.9499)],0.243,0.757,0.0,-0.5423


In [None]:
#average vectors for each item
average_df = pd.DataFrame()
coins = list(df['Query'].unique())
nltk_negs = []
nltk_neus
for coin in coins:
    temp = df[df['Query']==coin]
