In [1]:
import translators as ts
from langdetect import detect

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
import pandas as pd
import numpy as np
import csv
from tqdm import tqdm

In [3]:
df = pd.read_csv("../datasets/covid_philippines/covid_philippines_comments.csv").drop("Unnamed: 0", axis=1)
df.head()

Unnamed: 0,video_id,comment
0,aLZ85hb4wjE,Loved the calmness of Manilla.
1,aLZ85hb4wjE,After this pandemic I think every country shou...
2,aLZ85hb4wjE,manila looks beautifull with less people
3,aLZ85hb4wjE,The lockdown makes the city look like a place ...
4,aLZ85hb4wjE,India also same


## TRANSLATE

In [4]:
translated_comments = {}
translated_comments["video_id"] = {}
translated_comments["comment"] = {}
video_id_list = df["video_id"].to_list()
comments_list = df["comment"].to_list()

In [5]:
pbar = tqdm(total=len(video_id_list))
pbar.set_description("Translating...")

for i in range(df.shape[0]):
    if comments_list[i] != None:
        new_comment = comments_list[i]
        try:
            lang = detect(comments_list[i]) #added langdetect since it errors if there are many entries to translate, so now it will ontly tranlate if comment not english
            if lang != 'en':
                new_comment = ts.translate_text(comments_list[i], 'google', to_language = 'en')
                
        except:
            # No change; get same comment from list
            pass
            
        finally:
            translated_comments["video_id"][i] = video_id_list[i]
            translated_comments["comment"][i] = new_comment
            pbar.update(1)

translated_df = pd.DataFrame.from_dict(translated_comments)
pbar.close()

Translating...:   0%|          | 0/1216 [00:00<?, ?it/s]Translating...: 100%|██████████| 1216/1216 [04:57<00:00,  4.09it/s]


In [6]:
translated_df

Unnamed: 0,video_id,comment
0,aLZ85hb4wjE,Loved the calmness of Manilla.
1,aLZ85hb4wjE,After this pandemic I think every country shou...
2,aLZ85hb4wjE,manila looks beautifull with less people
3,aLZ85hb4wjE,The lockdown makes the city look like a place ...
4,aLZ85hb4wjE,India also same
...,...,...
1211,5DvMPgoKZmM,This covid will be a never-ending fuckery as l...
1212,5DvMPgoKZmM,A new variant is inevitable.
1213,5DvMPgoKZmM,The man that should resign from his office is ...
1214,5DvMPgoKZmM,"Is that the people who got been vaccinated.,"


---

In [7]:
def translate(video_id_list, comments_list):
    translated_comments = {}
    translated_comments["video_id"] = {}
    translated_comments["comment"] = {}
    
    pbar = tqdm(total=len(video_id_list))
    pbar.set_description("Translating...")

    for i in range(df.shape[0]):
        if comments_list[i] != None:
            new_comment = comments_list[i]
            try:
                lang = detect(comments_list[i]) #added langdetect since it errors if there are many entries to translate, so now it will ontly tranlate if comment not english
                if lang != 'en':
                    new_comment = ts.translate_text(comments_list[i], 'google', to_language = 'en')

            except:
                # No change; get same comment from list
                pass

            finally:
                translated_comments["video_id"][i] = video_id_list[i]
                translated_comments["comment"][i] = new_comment
                pbar.update(1)

    translated_df = pd.DataFrame.from_dict(translated_comments)
    pbar.close()

## DETECT SPAM

### MultinomialNB

- 0:Not Spam
- 1:Spam

In [8]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
#from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [9]:
# dataset source: https://archive.ics.uci.edu/dataset/380/youtube+spam+collection

psy_model_df = pd.read_csv("../datasets/model_train/Youtube01-Psy.csv")
lmfao_model_df = pd.read_csv("../datasets/model_train/Youtube03-LMFAO.csv")
kp_model_df = pd.read_csv("../datasets/model_train/Youtube02-KatyPerry.csv")
shakira_df = pd.read_csv("../datasets/model_train/Youtube05-Shakira.csv")
model_df = pd.concat([psy_model_df,lmfao_model_df,kp_model_df,shakira_df])
model_df.reset_index(inplace=True, drop=True) 
model_df.count()

COMMENT_ID    1508
AUTHOR        1508
DATE          1508
CONTENT       1508
CLASS         1508
dtype: int64

In [10]:
model_df.drop_duplicates(subset="CONTENT")

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,CLASS
0,LZQPQhLyRh80UYxNuaDWhIGQYNQ96IuCg-AYWqNPjpU,Julius NM,2013-11-07T06:20:48,"Huh, anyway check out this you[tube] channel: ...",1
1,LZQPQhLyRh_C2cTtd9MvFRJedxydaVW-2sNg5Diuo4A,adam riyati,2013-11-07T12:37:15,Hey guys check out my new channel and our firs...,1
2,LZQPQhLyRh9MSZYnf8djyk0gEF9BHDPYrrK-qCczIY8,Evgeny Murashkin,2013-11-08T17:34:21,just for test I have to say murdev.com,1
3,z13jhp0bxqncu512g22wvzkasxmvvzjaz04,ElNino Melendez,2013-11-09T08:28:43,me shaking my sexy ass on my channel enjoy ^_^ ﻿,1
4,z13fwbwp1oujthgqj04chlngpvzmtt3r3dw,GsMega,2013-11-10T16:05:38,watch?v=vtaRGgvGtWQ Check this out .﻿,1
...,...,...,...,...,...
1502,_2viQ_Qnc6_1Hq9MGlefkBIszt9rYD3S_CozADvMhQ4,Dinova Sharon,2013-07-13T14:44:00.700000,well done shakira,0
1503,_2viQ_Qnc6-bMSjqyL1NKj57ROicCSJV5SwTrw-RFFA,Katie Mettam,2013-07-13T13:27:39.441000,I love this song because we sing it at Camp al...,0
1504,_2viQ_Qnc6-pY-1yR6K2FhmC5i48-WuNx5CumlHLDAI,Sabina Pearson-Smith,2013-07-13T13:14:30.021000,I love this song for two reasons: 1.it is abou...,0
1506,_2viQ_Qnc6_yBt8UGMWyg3vh0PulTqcqyQtdE7d4Fl0,Aishlin Maciel,2013-07-13T11:17:52.308000,Shakira u are so wiredo,0


In [11]:
model_df.count()

COMMENT_ID    1508
AUTHOR        1508
DATE          1508
CONTENT       1508
CLASS         1508
dtype: int64

In [12]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords 
from nltk.stem import PorterStemmer 

stop_words = stopwords.words('english')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Melanie\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


preprocessing

In [13]:
import string

In [15]:
model_df['CONTENT'] = model_df['CONTENT'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))


ps = PorterStemmer() 
for w in model_df["CONTENT"]: 
    #convert to lowercase
    model_df['CONTENT'] = model_df["CONTENT"].str.lower()
    #remove punctuation
    model_df['CONTENT'] = model_df['CONTENT'].str.replace('[^\w\s]','')

    #Stem
    model_df['CONTENT'] = model_df["CONTENT"].apply(ps.stem) 
    


In [16]:
x = model_df["CONTENT"]
y  =model_df["CLASS"]

vectorizer = CountVectorizer()
x = vectorizer.fit_transform(x)

x_train, x_test, y_train, y_test  = train_test_split(x,y,random_state=42)



In [17]:
spam_nb = MultinomialNB()
spam_nb.fit(x_train,y_train)

predictions=spam_nb.predict(x_train)
accuracy = accuracy_score(y_train, predictions)
print(f"Train Accuracy: {accuracy}")

class_report = classification_report(y_train, predictions)
print("Classification Report:")
print(class_report)

Train Accuracy: 0.9708222811671088
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       544
           1       0.97      0.97      0.97       587

    accuracy                           0.97      1131
   macro avg       0.97      0.97      0.97      1131
weighted avg       0.97      0.97      0.97      1131



In [18]:
predictions=spam_nb.predict(x_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Test Accuracy: {accuracy}")

class_report = classification_report(y_test, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.9230769230769231
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.94      0.93       204
           1       0.93      0.90      0.91       173

    accuracy                           0.92       377
   macro avg       0.92      0.92      0.92       377
weighted avg       0.92      0.92      0.92       377



test with another dataset again

In [19]:
# dataset source: https://archive.ics.uci.edu/dataset/380/youtube+spam+collection

em_model_df = pd.read_csv("../datasets/model_train/Youtube04-Eminem.csv")
em_model_df.head()

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,CLASS
0,z12rwfnyyrbsefonb232i5ehdxzkjzjs2,Lisa Wellas,,+447935454150 lovely girl talk to me xxx﻿,1
1,z130wpnwwnyuetxcn23xf5k5ynmkdpjrj04,jason graham,2015-05-29T02:26:10.652000,I always end up coming back to this song<br />﻿,0
2,z13vsfqirtavjvu0t22ezrgzyorwxhpf3,Ajkal Khan,,"my sister just received over 6,500 new <a rel=...",1
3,z12wjzc4eprnvja4304cgbbizuved35wxcs,Dakota Taylor,2015-05-29T02:13:07.810000,Cool﻿,0
4,z13xjfr42z3uxdz2223gx5rrzs3dt5hna,Jihad Naser,,Hello I&#39;am from Palastine﻿,1


In [20]:
em_model_df['CONTENT'] = em_model_df['CONTENT'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))


ps = PorterStemmer() 
for w in em_model_df["CONTENT"]: 
    #convert to lowercase
    em_model_df['CONTENT'] = em_model_df["CONTENT"].str.lower()
    #remove punctuation
    em_model_df['CONTENT'] = em_model_df['CONTENT'].str.replace('[^\w\s]','')

    #Stem
    em_model_df['CONTENT'] = em_model_df["CONTENT"].apply(ps.stem) 

In [21]:
em_x=em_model_df["CONTENT"]
em_y=em_model_df["CLASS"]

em_x=vectorizer.transform(em_x)

In [22]:
predictions=spam_nb.predict(em_x)
accuracy = accuracy_score(em_y, predictions)
print(f"Test Accuracy: {accuracy}")

class_report = classification_report(em_y, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.828125
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.69      0.78       203
           1       0.79      0.94      0.86       245

    accuracy                           0.83       448
   macro avg       0.85      0.82      0.82       448
weighted avg       0.84      0.83      0.82       448



In [None]:
#Hypertuning MultinomialNB

In [23]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV


In [25]:
hyperparameters={
    'alpha': [0.01, 0.1, 0.5, 1.0, 10.0,2,0.2,0.02 ],
    'fit_prior': [True, False],
    'class_prior': [None, [.1,.9],[.2, .8],[.3,.7],[.4,.6],[.9,.1],[.8,.2]]
         
}

In [26]:
#https://coderzcolumn.com/tutorials/machine-learning/scikit-learn-sklearn-naive-bayes#6
multinomial_nb_random = RandomizedSearchCV(MultinomialNB(),param_distributions=hyperparameters,n_iter=500,cv=10,random_state=42).fit(x_train,y_train)




In [27]:
multinomial_nb_random.best_params_

{'fit_prior': True, 'class_prior': [0.8, 0.2], 'alpha': 10.0}

In [28]:
predictions = multinomial_nb_random.best_estimator_.predict(x_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Test Accuracy: {accuracy}")

class_report = classification_report(y_test, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.9283819628647215
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.95      0.93       204
           1       0.93      0.91      0.92       173

    accuracy                           0.93       377
   macro avg       0.93      0.93      0.93       377
weighted avg       0.93      0.93      0.93       377



In [29]:
predictions = multinomial_nb_random.best_estimator_.predict(em_x)
accuracy = accuracy_score(em_y, predictions)
print(f"Eminem Test Accuracy: {accuracy}")

class_report = classification_report(em_y, predictions)
print("Classification Report:")
print(class_report)

Eminem Test Accuracy: 0.9397321428571429
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.94      0.93       203
           1       0.95      0.94      0.94       245

    accuracy                           0.94       448
   macro avg       0.94      0.94      0.94       448
weighted avg       0.94      0.94      0.94       448



### SVM 

In [30]:
x=model_df["CONTENT"]
y=model_df["CLASS"]

vectorizer = CountVectorizer()
x=vectorizer.fit_transform(x)

x_train, x_test, y_train, y_test  = train_test_split(x,y,test_size=0.30,random_state=42)


In [31]:
#from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

svm_model = SVC(kernel = 'sigmoid', gamma = 1.0)
svm_model.fit(x_train, y_train)

predictions = svm_model.predict(x_train)
accuracy = accuracy_score(y_train, predictions)
print(f"Train Accuracy: {accuracy}")

class_report = classification_report(y_train, predictions)
print("Classification Report:")
print(class_report)

Train Accuracy: 0.8511848341232228
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.85       504
           1       0.87      0.84      0.86       551

    accuracy                           0.85      1055
   macro avg       0.85      0.85      0.85      1055
weighted avg       0.85      0.85      0.85      1055



In [32]:
predictions = svm_model.predict(x_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Test Accuracy: {accuracy}")

class_report = classification_report(y_test, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.8785871964679912
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.88      0.89       244
           1       0.86      0.88      0.87       209

    accuracy                           0.88       453
   macro avg       0.88      0.88      0.88       453
weighted avg       0.88      0.88      0.88       453



In [33]:
em_x=em_model_df["CONTENT"]
em_y=em_model_df["CLASS"]

em_x=vectorizer.transform(em_x)

predictions = svm_model.predict(em_x)
accuracy = accuracy_score(em_y, predictions)
print(f"Test Accuracy: {accuracy}")

class_report = classification_report(em_y, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.8325892857142857
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.82      0.82       203
           1       0.85      0.84      0.85       245

    accuracy                           0.83       448
   macro avg       0.83      0.83      0.83       448
weighted avg       0.83      0.83      0.83       448



In [34]:
svc = SVC(max_iter=1000)

In [35]:
hyperparameters= [
    {
        "C":[0.0001, 0.001, 0.01 , 0.1, 1.0, 5, 30, 50],
        "kernel": ["linear","poly","rbf","sigmoid"],
        "degree" :[1, 3, 5, 10, 25, 50,100],
        "gamma" :["scale", "auto",1000, 10, 5, 2.5, 1.5, 1.0]
    }
]

In [81]:
rssvc = RandomizedSearchCV(estimator = svc, param_distributions = hyperparameters, n_iter =300, cv=10, random_state=42).fit(x_train,y_train)


178 fits failed out of a total of 3000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
178 fits failed with the following error:
Traceback (most recent call last):
  File "d:\Program Files\miniconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\Program Files\miniconda3\lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\Program Files\miniconda3\lib\site-packages\sklearn\svm\_base.py", line 268, in fit
    raise ValueError(
ValueError: The dual coefficients or intercepts are not finite. The input data may contain large values and need to be preprocessed.

 0.5592363  0.9

In [82]:
rssvc.best_estimator_

In [83]:
predictions=rssvc.best_estimator_.predict(x_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Test Accuracy: {accuracy}")

class_report = classification_report(y_test, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.9514348785871964
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.97      0.96       244
           1       0.96      0.93      0.95       209

    accuracy                           0.95       453
   macro avg       0.95      0.95      0.95       453
weighted avg       0.95      0.95      0.95       453



In [84]:
predictions=rssvc.best_estimator_.predict(em_x)
accuracy = accuracy_score(em_y, predictions)
print(f"Test Accuracy (EMINEM DF): {accuracy}")

class_report = classification_report(em_y, predictions)
print("Classification Report:")
print(class_report)

Test Accuracy (EMINEM DF): 0.9732142857142857
Classification Report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       203
           1       1.00      0.95      0.97       245

    accuracy                           0.97       448
   macro avg       0.97      0.98      0.97       448
weighted avg       0.97      0.97      0.97       448



better performance with eminem dataset using SVM

In [40]:
spam_svc=rssvc.best_estimator_
spam_nb=multinomial_nb_random.best_estimator_

### Check Models Wih Translated_df

try models on translated_df

#### MultinomialNB

In [41]:
nb_spam_filtered = translated_df.copy()
#svm_spam_filtered = translated_df.copy()

In [42]:
nb_spam_filtered["comment_cleaned"] = nb_spam_filtered["comment"].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))


ps = PorterStemmer() 
for w in nb_spam_filtered["comment"]: 
    #remove punctuation
    nb_spam_filtered["comment_cleaned"] = nb_spam_filtered["comment"].str.replace('[^\w\s]','')
    nb_spam_filtered["comment_cleaned"] = nb_spam_filtered["comment"].str.lower()
    nb_spam_filtered["comment_cleaned"] = nb_spam_filtered["comment"].apply(ps.stem) 


In [43]:
svm_spam_filtered = nb_spam_filtered.copy() #so no need to go through same cleaning/preprocessing

In [85]:
transformed = vectorizer.transform(nb_spam_filtered["comment_cleaned"])

nb_spam_filtered["spam"]=spam_nb.predict(transformed)
nb_spam_filtered.head()

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,aLZ85hb4wjE,Loved the calmness of Manilla.,loved the calmness of manilla.,0
1,aLZ85hb4wjE,After this pandemic I think every country shou...,after this pandemic i think every country shou...,0
2,aLZ85hb4wjE,manila looks beautifull with less people,manila looks beautifull with less peopl,0
3,aLZ85hb4wjE,The lockdown makes the city look like a place ...,the lockdown makes the city look like a place ...,1
4,aLZ85hb4wjE,India also same,india also sam,0


In [86]:
nb_spam_filtered[nb_spam_filtered["spam"]==1].count()

video_id           383
comment            383
comment_cleaned    383
spam               383
dtype: int64

In [87]:
nb_spam_filtered[nb_spam_filtered["spam"]==0].count()

video_id           833
comment            833
comment_cleaned    833
spam               833
dtype: int64

#### SVM CHECK

In [47]:
transformed = vectorizer.transform(svm_spam_filtered["comment_cleaned"])

svm_spam_filtered["spam"]=spam_svc.predict(transformed)
svm_spam_filtered.head()

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,aLZ85hb4wjE,Loved the calmness of Manilla.,loved the calmness of manilla.,0
1,aLZ85hb4wjE,After this pandemic I think every country shou...,after this pandemic i think every country shou...,0
2,aLZ85hb4wjE,manila looks beautifull with less people,manila looks beautifull with less peopl,0
3,aLZ85hb4wjE,The lockdown makes the city look like a place ...,the lockdown makes the city look like a place ...,0
4,aLZ85hb4wjE,India also same,india also sam,0


In [88]:
svm_spam_filtered[svm_spam_filtered["spam"]==1].count()

video_id           94
comment            94
comment_cleaned    94
spam               94
dtype: int64

In [89]:
svm_spam_filtered[svm_spam_filtered["spam"]==0].count()

video_id           1122
comment            1122
comment_cleaned    1122
spam               1122
dtype: int64

In [50]:
svm_spam_filtered[svm_spam_filtered["spam"]==1]

Unnamed: 0,video_id,comment,comment_cleaned,spam
25,dIsaz_XlmTw,Is the covid BS 1 variant still coming,is the covid bs 1 variant still com,1
47,nTUWK8vufOk,I am so glad that they also tackled the mental...,i am so glad that they also tackled the mental...,1
58,cPVE7QGS7As,Deaths have been low compared to other countri...,deaths have been low compared to other countri...,1
65,lw16DeB6zns,"Tthis was really confusing, plss fix ur news w...","tthis was really confusing, plss fix ur news w...",1
69,lw16DeB6zns,its kinda cute to read angry Pinoy comments,its kinda cute to read angry pinoy com,1
...,...,...,...,...
1163,lw0rcwYyiwE,VARIANT XZY IS NEXT PLEASE LOCK UP THE POLITIC...,variant xzy is next please lock up the politician,1
1164,lw0rcwYyiwE,Next variant would be Alpha/Omega \r\nNext is ...,next variant would be alpha/omega \r\nnext is ...,1
1187,Y-xQtgvNuvA,"In the Philippines, mandatory faceshields and ...","in the philippines, mandatory faceshields and ...",1
1190,W7n2FoRinVk,I went to the vaccine center today for booster...,i went to the vaccine center today for booster...,1


In [51]:
svm_spam_filtered[svm_spam_filtered["spam"]==0]

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,aLZ85hb4wjE,Loved the calmness of Manilla.,loved the calmness of manilla.,0
1,aLZ85hb4wjE,After this pandemic I think every country shou...,after this pandemic i think every country shou...,0
2,aLZ85hb4wjE,manila looks beautifull with less people,manila looks beautifull with less peopl,0
3,aLZ85hb4wjE,The lockdown makes the city look like a place ...,the lockdown makes the city look like a place ...,0
4,aLZ85hb4wjE,India also same,india also sam,0
...,...,...,...,...
1211,5DvMPgoKZmM,This covid will be a never-ending fuckery as l...,this covid will be a never-ending fuckery as l...,0
1212,5DvMPgoKZmM,A new variant is inevitable.,a new variant is inevitable.,0
1213,5DvMPgoKZmM,The man that should resign from his office is ...,the man that should resign from his office is ...,0
1214,5DvMPgoKZmM,"Is that the people who got been vaccinated.,","is that the people who got been vaccinated.,",0


### Israel-Palestine comments check

In [52]:
is_pal_df = pd.read_csv("../datasets/israeli-palestine_conflict_history/comments.csv").drop("Unnamed: 0", axis=1)
is_pal_df.head()

Unnamed: 0,video_id,comment
0,R0ftmf_Uv9A,No matter how many times these information get...
1,R0ftmf_Uv9A,"*To learn who RULES over you, simply find out ..."
2,R0ftmf_Uv9A,"Say that part again: Jewish , Christian’s and ..."
3,R0ftmf_Uv9A,Let peace prevail.
4,R0ftmf_Uv9A,Why start at 1946?


In [96]:
is_pal_nb= is_pal_df.copy()

In [97]:
is_pal_nb["comment_cleaned"] = is_pal_nb["comment"].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))


ps = PorterStemmer() 
for w in is_pal_df["comment"]:     
    is_pal_nb["comment_cleaned"] = is_pal_nb["comment"].str.lower()
    is_pal_nb["comment_cleaned"] = is_pal_nb["comment"].str.replace('[^\w\s]','')
    is_pal_nb["comment_cleaned"] = is_pal_nb["comment"].apply(ps.stem) 
    

In [98]:
is_pal_svc = is_pal_nb.copy()

#### MultinomialNB

In [99]:
transformed = vectorizer.transform(is_pal_nb["comment_cleaned"])

is_pal_nb["spam"]=spam_nb.predict(transformed)
is_pal_nb.head()

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,R0ftmf_Uv9A,No matter how many times these information get...,no matter how many times these information get...,0
1,R0ftmf_Uv9A,"*To learn who RULES over you, simply find out ...","*to learn who rules over you, simply find out ...",1
2,R0ftmf_Uv9A,"Say that part again: Jewish , Christian’s and ...","say that part again: jewish , christian’s and ...",1
3,R0ftmf_Uv9A,Let peace prevail.,let peace prevail.,0
4,R0ftmf_Uv9A,Why start at 1946?,why start at 1946?,0


In [100]:
is_pal_nb[is_pal_nb["spam"]==1].count()

video_id           694
comment            694
comment_cleaned    694
spam               694
dtype: int64

In [101]:
is_pal_nb[is_pal_nb["spam"]==0].count()

video_id           1180
comment            1180
comment_cleaned    1180
spam               1180
dtype: int64

In [102]:
is_pal_nb[is_pal_nb["spam"]==1]

Unnamed: 0,video_id,comment,comment_cleaned,spam
1,R0ftmf_Uv9A,"*To learn who RULES over you, simply find out ...","*to learn who rules over you, simply find out ...",1
2,R0ftmf_Uv9A,"Say that part again: Jewish , Christian’s and ...","say that part again: jewish , christian’s and ...",1
5,R0ftmf_Uv9A,"Thank's Ireland, for being consistent n vocal ...","thank's ireland, for being consistent n vocal ...",1
11,Bno1m1zhIWs,Insane how the HISTORY channel of all places a...,insane how the history channel of all places a...,1
12,Bno1m1zhIWs,Finally. An objective concise summary with no ...,finally. an objective concise summary with no ...,1
...,...,...,...,...
1866,nUfWTHbCS78,Dear brother \r\nMy name is Alia Mohammed Jalu...,dear brother \r\nmy name is alia mohammed jalu...,1
1868,nUfWTHbCS78,The truth is out there.... Why do people not ...,the truth is out there.... why do people not ...,1
1869,nUfWTHbCS78,Norman is a true gem of a human being \nI pray...,norman is a true gem of a human being \ni pray...,1
1870,nUfWTHbCS78,An asset for this issue -explains the issue wi...,an asset for this issue -explains the issue wi...,1


#### SVM

In [103]:
transformed = vectorizer.transform(is_pal_svc["comment_cleaned"])

is_pal_svc["spam"]=spam_svc.predict(transformed)
is_pal_svc.head()

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,R0ftmf_Uv9A,No matter how many times these information get...,no matter how many times these information get...,0
1,R0ftmf_Uv9A,"*To learn who RULES over you, simply find out ...","*to learn who rules over you, simply find out ...",0
2,R0ftmf_Uv9A,"Say that part again: Jewish , Christian’s and ...","say that part again: jewish , christian’s and ...",1
3,R0ftmf_Uv9A,Let peace prevail.,let peace prevail.,0
4,R0ftmf_Uv9A,Why start at 1946?,why start at 1946?,0


In [104]:
is_pal_df[is_pal_df["spam"]==1].count()

video_id           231
comment            231
comment_cleaned    231
spam               231
dtype: int64

In [105]:
is_pal_df[is_pal_df["spam"]==0].count()

video_id           1643
comment            1643
comment_cleaned    1643
spam               1643
dtype: int64

Seems like SVM works better

In [106]:
covid_filtered_out_df = svm_spam_filtered[svm_spam_filtered["spam"]==0]
covid_filtered_out_df.head()

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,aLZ85hb4wjE,Loved the calmness of Manilla.,loved the calmness of manilla.,0
1,aLZ85hb4wjE,After this pandemic I think every country shou...,after this pandemic i think every country shou...,0
2,aLZ85hb4wjE,manila looks beautifull with less people,manila looks beautifull with less peopl,0
3,aLZ85hb4wjE,The lockdown makes the city look like a place ...,the lockdown makes the city look like a place ...,0
4,aLZ85hb4wjE,India also same,india also sam,0


In [107]:
is_pal_filtered_df = is_pal_df[is_pal_df["spam"]==0]
is_pal_filtered_df.head()

Unnamed: 0,video_id,comment,comment_cleaned,spam
0,R0ftmf_Uv9A,No matter how many times these information get...,no matter how many times these information get...,0
1,R0ftmf_Uv9A,"*To learn who RULES over you, simply find out ...","*to learn who rules over you, simply find out ...",0
3,R0ftmf_Uv9A,Let peace prevail.,let peace prevail.,0
4,R0ftmf_Uv9A,Why start at 1946?,why start at 1946?,0
5,R0ftmf_Uv9A,"Thank's Ireland, for being consistent n vocal ...","thank's ireland, for being consistent n vocal ...",0


## SENTIMENT ANALYSIS

TEST RoBERTa

In [64]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
from scipy.special import softmax

def roberta_sentiment_scores(list_comments):
    roberta = "cardiffnlp/twitter-roberta-base-sentiment"
    labels = ['Negative', 'Neutral', 'Positive']

    model = AutoModelForSequenceClassification.from_pretrained(roberta)
    tokenizer = AutoTokenizer.from_pretrained(roberta)

    for comments in list_comments:
        comment_words = []
        comments = comments.replace("\n", " ")
        comments = comments.replace("\xa0", " ")
        comments = comments.replace("?", " ")
        comments = comments.replace(":", " ")
        comments = comments.replace(";", " ")
        comments = comments.replace(";", " ")
        comments = re.sub(r"\s+", ' ', comments) 
        print(comments)
        for word in comments.split(' '):
            if word.startswith('@') and len(word) > 1:
               word = '@user'
        
            elif word.startswith('http'):
                word = "http"
            comment_words.append(word)

        comment_procs = " ".join(comment_words)

        encoded = tokenizer(comment_procs, return_tensors='pt', max_length=512, truncation=True, padding=True)
        print(encoded)
        output = model(**encoded)

        scores = output[0][0].detach().numpy()

        scores = softmax(scores)

        for i in range(len(scores)):

            l = labels[i]
            s = scores[i]
            print(l, s)

In [65]:
roberta_sentiment_scores(covid_filtered_out_df["comment"][:50])

  return self.fget.__get__(instance, owner)()


Loved the calmness of Manilla.
{'input_ids': tensor([[    0,   574, 12677,     5,  6327,  1825,     9,  1554,  4699,     4,
             2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
Negative 0.0026419058
Neutral 0.031785045
Positive 0.9655731
After this pandemic I think every country should have a lockdown every month to give mother nature a time to heal, with all the pollution that the earth is experiencing.
{'input_ids': tensor([[    0,  4993,    42, 23387, 14414,    38,   206,   358,   247,   197,
            33,    10, 23076,   358,   353,     7,   492,   985,  2574,    10,
            86,     7, 14384,     6,    19,    70,     5,  6631,    14,     5,
          6872,    16,  7242,     4,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
Negative 0.72357625
Neutral 0.25514364
Positive 0.021280115
manila looks beautifull with less people
{'input_ids': tensor

TEST TEXTBLOB

In [66]:
from textblob import TextBlob

def textblob_sentiment_scores(list_comments):
    list_sentiment={}
    for comment in list_comments:
        testimonial = TextBlob(comment)
        if (testimonial.sentiment.polarity > 0):
            print("positive", testimonial.sentiment.polarity)
        elif (testimonial.sentiment.polarity < 0):
            print("negative", testimonial.sentiment.polarity)
        else:
            print("neutral", testimonial.sentiment.polarity)

In [67]:
textblob_sentiment_scores(covid_filtered_out_df["comment"][:50])

positive 0.7
neutral 0.0
negative -0.16666666666666666
positive 0.11250000000000002
neutral 0.0
positive 0.44999999999999996
positive 0.7
negative -0.55
positive 0.475
neutral 0.0
negative -0.6
neutral 0.0
negative -0.5
negative -0.19999999999999998
negative -0.45285714285714285
negative -0.5
neutral 0.0
negative -0.3729166666666667
neutral 0.0
positive 0.2
positive 0.48828125
neutral 0.0
positive 0.425
positive 0.2
positive 0.14375
neutral 0.0
neutral 0.0
negative -0.125
positive 0.0020833333333333346
neutral 0.0
neutral 0.0
neutral 0.0
neutral 0.0
negative -0.16666666666666666
neutral 0.0
negative -0.4
negative -0.14666666666666664
positive 0.18611111111111112
neutral 0.0
neutral 0.0
negative -0.10166666666666668
negative -0.35
positive 0.5
positive 0.09999999999999998
positive 0.30833333333333335
positive 0.5
positive 0.18194444444444446
positive 0.3
neutral 0.0
neutral 0.0


TEST STANZA

In [68]:
import stanza
def stanza_sentiment_scores(list_comments):
    nlp = stanza.Pipeline('en', processors='tokenize,sentiment', tokenize_no_ssplit=True)

    for comment in list_comments:
        doc = nlp(comment.replace("\n", " "))
        print(comment)
    #doc.sentences[0].print_dependencies()
        for i, sentence in enumerate(doc.sentences):
            print("%d -> %d" % (i, sentence.sentiment))

In [69]:
stanza_sentiment_scores(covid_filtered_out_df["comment"][:50])

2024-03-08 15:23:56 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.7.0.json:   0%|   …

2024-03-08 15:23:58 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |
| mwt       | combined |
| sentiment | sstplus  |

2024-03-08 15:23:58 INFO: Using device: cpu
2024-03-08 15:23:58 INFO: Loading: tokenize
2024-03-08 15:23:59 INFO: Loading: mwt
2024-03-08 15:23:59 INFO: Loading: sentiment
2024-03-08 15:24:00 INFO: Done loading processors!


Loved the calmness of Manilla.
0 -> 2
After this pandemic I think every country should have a lockdown every month to give mother nature a time to heal, with all the pollution that the earth is experiencing.
0 -> 0
manila looks beautifull with less people
0 -> 2
The lockdown makes the city look like a place I want to explore.

Before the lockdown, it looked like a crowded mess filled with traffic and pollution.
0 -> 0
India also same
0 -> 1
The president promised that he'll do his best to ease the traffic in the metro like 5mins of travel along EDSA. Fortunately it happened but unfortunately it wasn't in the ideal way. Nature's sense of humor tho.
0 -> 0
I went up the viewing spot at antipolo, its a very beautiful, almost smog free view of the metro skyline
0 -> 2
The sad part is people around the world did not intend to heal earth but afraid of death.
0 -> 1
i hope it stays like this forever its beautiful with no messy people
0 -> 2
2020 is the year when nature fight back and reduce h

TEST VADER

In [70]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def vader_sentiment_scores (list_comments):
    for sentence in list_comments:
        sid_obj = SentimentIntensityAnalyzer()
        sentiment_dict = sid_obj.polarity_scores(sentence)
        print(sentence)
        print("Overall sentiment dictionary is : ", sentiment_dict)
        print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
        print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
        print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
 
        print("Sentence Overall Rated As", end = " ")
 
        # decide sentiment as positive, negative and neutral
        if sentiment_dict['compound'] >= 0.05 :
            print("Positive")
 
        elif sentiment_dict['compound'] <= - 0.05 :
            print("Negative")
 
        else :
            print("Neutral")

    

In [71]:
vader_sentiment_scores (covid_filtered_out_df["comment"][:50])

Loved the calmness of Manilla.
Overall sentiment dictionary is :  {'neg': 0.0, 'neu': 0.312, 'pos': 0.688, 'compound': 0.765}
sentence was rated as  0.0 % Negative
sentence was rated as  31.2 % Neutral
sentence was rated as  68.8 % Positive
Sentence Overall Rated As Positive
After this pandemic I think every country should have a lockdown every month to give mother nature a time to heal, with all the pollution that the earth is experiencing.
Overall sentiment dictionary is :  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
sentence was rated as  0.0 % Negative
sentence was rated as  100.0 % Neutral
sentence was rated as  0.0 % Positive
Sentence Overall Rated As Neutral
manila looks beautifull with less people
Overall sentiment dictionary is :  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
sentence was rated as  0.0 % Negative
sentence was rated as  100.0 % Neutral
sentence was rated as  0.0 % Positive
Sentence Overall Rated As Neutral
The lockdown makes the city look like

### Get Sentiment sentiment for each video

#### TEXTBLOB APPROACH

In [72]:
def textblob_sentiment_scores(list_comments):
    list_sentiment={}
    for comment in list_comments:
        testimonial = TextBlob(comment)
        if (testimonial.sentiment.polarity > 0):
            print("positive", testimonial.sentiment.polarity)
        elif (testimonial.sentiment.polarity < 0):
            print("negative", testimonial.sentiment.polarity)
        else:
            print("neutral", testimonial.sentiment.polarity)

In [73]:
covid_filtered_out_df["video_id"].unique()

array(['aLZ85hb4wjE', 'sYI97jv-pZg', '3YFpjgIQqEo', 'dIsaz_XlmTw',
       'DWxIvQlpJK8', 'pMUumjHY3tw', 'nTUWK8vufOk', 'cPVE7QGS7As',
       'CEIrzjA8euQ', 'lw16DeB6zns', '8dTelszbObM', 'Mxf8uGFcqSE',
       'MQ5aYS4YFlQ', 'iOE6rAY8l-k', '_a-rQYfsCck', 'jEINZXA_ujo',
       '587N9bJ5J5k', 'E5F3xA_zkFc', '05JLyd58R-w', '2fRQ8OsqOLs',
       'E56W-5xVOss', 'c1oU8U05puY', 'SEHcakm-fAc', '1psSvU1km0I',
       'PP3Yu-ro1tA', 'NQeI1CRCqeo', 'RhDHGgo4yZg', 'sdsz-t540WI',
       '7SKGXkZKjV8', 'oy0wHScCPds', 'o1kPskxFkQ8', 'aoNQUUQno00',
       'liEUC1_l8e8', '-b0EuuMvvy8', 'MmyIvf7bEGc', 'Fd3HcncV6Zk',
       '-n9Ks3VTub4', 'LzymJ2xZhho', '-qFcO_onBdA', 'fp9uRsmTWqg',
       '3ZXR2eARmuQ', '57wz3HuIVLA', 'lbK7UjoLr8o', 'xV1oR-RbOGU',
       'kn6DKdInYXk', '2T3yZ6lNRDg', 'HLYSlKY6Ww4', 'sH_YoV-NA6s',
       'ZLT6L3PHz78', 'lczwrm68u6I', 'zhBdbLj5y6A', 'ibWFsmcnefk',
       'e0fN7HxkIUc', 'Ji47WRv2tQE', 'dj5ov38ihZI', 'j3E3NF9nk44',
       'BomdsEJjb0E', 'T7c6GvrF82k', '6DBFwIlT4fg', '2FgFNBIJT

In [74]:
def per_vid_textblob_polarity (df):
    video_polarity={}
    for video in df["video_id"].unique():
        sum_polarity=0
        for comment in df.loc[df["video_id"]==video]["comment"]:
            sum_polarity+=(TextBlob(comment)).sentiment.polarity
        video_polarity[video] = sum_polarity/df.loc[df["video_id"]==video]["comment"].count()
    ''' if (testimonial.sentiment.polarity > 0):
            print("positive", testimonial.sentiment.subjectivity)
        elif (testimonial.sentiment.polarity < 0):
            print("negative", testimonial.sentiment.subjectivity)
        else:
            print("neutral", testimonial.sentiment.subjectivity)'''
    return video_polarity


In [75]:
per_vid_textblob_polarity(covid_filtered_out_df)

{'aLZ85hb4wjE': 0.1720833333333333,
 'sYI97jv-pZg': -0.3,
 '3YFpjgIQqEo': -0.13374925595238094,
 'dIsaz_XlmTw': 0.07175925925925926,
 'DWxIvQlpJK8': -0.06590277777777777,
 'pMUumjHY3tw': -0.05083333333333334,
 'nTUWK8vufOk': 0.17114197530864197,
 'cPVE7QGS7As': 0.38312003968253966,
 'CEIrzjA8euQ': -0.25,
 'lw16DeB6zns': 0.13262471655328797,
 '8dTelszbObM': 0.20164552024308124,
 'Mxf8uGFcqSE': 0.16666666666666666,
 'MQ5aYS4YFlQ': 0.23005483405483407,
 'iOE6rAY8l-k': 0.03368055555555556,
 '_a-rQYfsCck': 0.05265151515151515,
 'jEINZXA_ujo': 0.37429292929292923,
 '587N9bJ5J5k': 0.10679012345679012,
 'E5F3xA_zkFc': 0.0866078884078884,
 '05JLyd58R-w': 0.0,
 '2fRQ8OsqOLs': 0.0919047619047619,
 'E56W-5xVOss': 0.10351587301587302,
 'c1oU8U05puY': 0.13055555555555556,
 'SEHcakm-fAc': 0.06823292448292448,
 '1psSvU1km0I': 0.16116931216931216,
 'PP3Yu-ro1tA': 0.12972727272727275,
 'NQeI1CRCqeo': 0.020672348484848484,
 'RhDHGgo4yZg': -0.04010416666666668,
 'sdsz-t540WI': 0.08361678004535146,
 '7SKGX

#### VADER APPROACH

In [76]:
def per_vid_vader_polarity (df):
    polarity_scores={}
    sid_obj = SentimentIntensityAnalyzer()

    for video in df["video_id"].unique():
        sum_neg =0
        sum_pos = 0
        sum_neu =0
        sum_compound= 0
        scores={}
        for sentence in df.loc[df["video_id"]==video]["comment"]:

            sentiment_dict = sid_obj.polarity_scores(sentence)
            sum_neg += sentiment_dict['neg']
            sum_pos += sentiment_dict['pos']
            sum_neu += sentiment_dict['neu']
            sum_compound += sentiment_dict['compound']
            
        scores["Neg"] = sum_neg/df.loc[df["video_id"]==video]["comment"].count()
        scores["Pos"] = sum_pos/df.loc[df["video_id"]==video]["comment"].count()
        scores["Neu"] = sum_neu/df.loc[df["video_id"]==video]["comment"].count()
        scores["Overall"] = sum_compound/df.loc[df["video_id"]==video]["comment"].count()
        
        polarity_scores[video]=scores
        '''# decide sentiment as positive, negative and neutral
        if sentiment_dict['compound'] >= 0.05 :
            print("Positive")
 
        elif sentiment_dict['compound'] <= - 0.05 :
            print("Negative")
 
        else :
            print("Neutral")'''
    return polarity_scores
        

In [77]:
per_vid_vader_polarity(covid_filtered_out_df)

{'aLZ85hb4wjE': {'Neg': 0.07980000000000001,
  'Pos': 0.194,
  'Neu': 0.7261,
  'Overall': 0.17872000000000002},
 'sYI97jv-pZg': {'Neg': 0.0, 'Pos': 0.056, 'Neu': 0.944, 'Overall': 0.11315},
 '3YFpjgIQqEo': {'Neg': 0.0635,
  'Pos': 0.11320000000000001,
  'Neu': 0.8231999999999999,
  'Overall': 0.029220000000000003},
 'dIsaz_XlmTw': {'Neg': 0.07455555555555556,
  'Pos': 0.16166666666666668,
  'Neu': 0.7638888888888888,
  'Overall': 0.18183333333333332},
 'DWxIvQlpJK8': {'Neg': 0.139625,
  'Pos': 0.09925,
  'Neu': 0.761125,
  'Overall': -0.00888750000000002},
 'pMUumjHY3tw': {'Neg': 0.0475,
  'Pos': 0.034,
  'Neu': 0.9185,
  'Overall': -0.15775},
 'nTUWK8vufOk': {'Neg': 0.08111111111111112,
  'Pos': 0.18999999999999997,
  'Neu': 0.729,
  'Overall': 0.4215777777777778},
 'cPVE7QGS7As': {'Neg': 0.0758888888888889,
  'Pos': 0.262,
  'Neu': 0.6621111111111111,
  'Overall': 0.3731222222222222},
 'CEIrzjA8euQ': {'Neg': 0.196, 'Pos': 0.0, 'Neu': 0.804, 'Overall': -0.296},
 'lw16DeB6zns': {'Neg'

### RoBERTa APPROACH

In [78]:
#from transformers import AutoTokenizer, AutoModelForSequenceClassification
#import re
#from scipy.special import softmax

def roberta_per_vid_scores(df):
    polarity_scores={}

    roberta = "cardiffnlp/twitter-roberta-base-sentiment"
    labels = ['Negative', 'Neutral', 'Positive']

    model = AutoModelForSequenceClassification.from_pretrained(roberta)
    tokenizer = AutoTokenizer.from_pretrained(roberta)
    for video in df["video_id"].unique():
        sum_neg =0
        sum_pos = 0
        sum_neu =0 
        scores={}
        for comments in df.loc[df["video_id"]==video]["comment"]:
            comment_words = []
            comments = comments.replace("\n", " ")
            comments = comments.replace("\xa0", " ")
            comments = comments.replace("?", " ")
            comments = comments.replace(":", " ")
            comments = comments.replace(";", " ")
            comments = comments.replace(";", " ")
            comments = re.sub(r"\s+", ' ', comments) 
     #   print(comments)
            for word in comments.split(' '):
                if word.startswith('@') and len(word) > 1:
                    word = '@user'
        
                elif word.startswith('http'):
                    word = "http"
                comment_words.append(word)

            comment_procs = " ".join(comment_words)

            encoded = tokenizer(comment_procs, return_tensors='pt', max_length=512, truncation=True, padding=True)
           # print(encoded)
            output = model(**encoded)

            scores = output[0][0].detach().numpy()

            scores = softmax(scores)

            for i in range(len(scores)):

                l = labels[i]
                s = scores[i]
                #    labels = ['Negative', 'Neutral', 'Positive']

                if l=="Negative":
                    sum_neg+= s
                elif l == "Neutral":
                    sum_neu+=s
                else : 
                    sum_pos+=s
       # scores["Neg"] = sum_neg/df.loc[df["video_id"]==video]["comment"].count()
        scores =dict([("Neg", sum_neg/df.loc[df["video_id"]==video]["comment"].count()),("Pos",sum_pos/df.loc[df["video_id"]==video]["comment"].count()),("Neu",sum_neu/df.loc[df["video_id"]==video]["comment"].count()
)])
        #scores["Pos"] = sum_pos/df.loc[df["video_id"]==video]["comment"].count()
        #scores["Neu"] = sum_neu/df.loc[df["video_id"]==video]["comment"].count()

        polarity_scores[video] = scores
    return polarity_scores

In [79]:
roberta_per_vid_scores(covid_filtered_out_df)

{'aLZ85hb4wjE': {'Neg': 0.24424575177254154,
  'Pos': 0.4741820393828675,
  'Neu': 0.28157221488654616},
 'sYI97jv-pZg': {'Neg': 0.3894941806793213,
  'Pos': 0.0859330091625452,
  'Neu': 0.5245728343725204},
 '3YFpjgIQqEo': {'Neg': 0.4642800234258175,
  'Pos': 0.23067305744625627,
  'Neu': 0.30504688881337644},
 'dIsaz_XlmTw': {'Neg': 0.36858308074685436,
  'Pos': 0.27306528648154604,
  'Neu': 0.3583516404032707},
 'DWxIvQlpJK8': {'Neg': 0.289873747547972,
  'Pos': 0.29180154629284516,
  'Neu': 0.41832468984648585},
 'pMUumjHY3tw': {'Neg': 0.28391369991004467,
  'Pos': 0.05910225957632065,
  'Neu': 0.6569840013980865},
 'nTUWK8vufOk': {'Neg': 0.446053135364006,
  'Pos': 0.32104381538617116,
  'Neu': 0.2329030301835802},
 'cPVE7QGS7As': {'Neg': 0.16431427941036722,
  'Pos': 0.5078818136826158,
  'Neu': 0.32780391226212185},
 'CEIrzjA8euQ': {'Neg': 0.15780450403690338,
  'Pos': 0.1664552390575409,
  'Neu': 0.6757402420043945},
 'lw16DeB6zns': {'Neg': 0.20506471338947968,
  'Pos': 0.27181