In [29]:
import nltk
#nltk.download()
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk import sent_tokenize , word_tokenize , pos_tag
import pandas as pd
import os
import operator
lemmatizer = WordNetLemmatizer ()

In [2]:
def penn_to_wn(tag):
    """
    Convert between the PennTreebank tags to simple Wordnet tags
    """
    if tag.startswith ('J'):
        return wn.ADJ
    elif tag.startswith ('N'):
        return wn.NOUN
    elif tag.startswith ('R'):
        return wn.ADV
    elif tag.startswith ('V'):
        return wn.VERB
    return None


def clean_text(text):
    text = text.replace ("<br />" , " ")
    return text


def sentiment_analyzer_scores(text):
    """
    Return a sentiment polarity: 0 = negative, 1 = positive
    """

    sentiment = 0.0
    tokens_count = 0

    text = clean_text (text)

    raw_sentences = sent_tokenize (text)
    for raw_sentence in raw_sentences:
        tagged_sentence = pos_tag (word_tokenize (raw_sentence))

        for word , tag in tagged_sentence:
            wn_tag = penn_to_wn (tag)
            if wn_tag not in (wn.NOUN , wn.ADJ , wn.ADV):
                continue

            lemma = lemmatizer.lemmatize (word , pos=wn_tag)
            if not lemma:
                continue

            synsets = wn.synsets (lemma , pos=wn_tag)
            if not synsets:
                continue

            # Take the first sense, the most common
            synset = synsets[0]
            swn_synset = swn.senti_synset (synset.name ())

            sentiment += swn_synset.pos_score () - swn_synset.neg_score ()
            tokens_count += 1

    # judgment call ? Default to positive or negative
    if not tokens_count:
        return 0

    # sum greater than 0 => positive sentiment
    if sentiment > 0:
        return "Positive"

    # negative sentiment
    if sentiment <0 :
        return "Negative"

    if sentiment == 0:
        return "Neutral"

In [36]:
d = os.path.dirname(os.getcwd())
dd = os.path.dirname(d)
print(dd)

df_input=pd.read_excel(os.path.join(f"{dd}", "dataset/Ukraine/ABC News - Russian troops close in on Ukrainian capital of Kyiv_final.xlsx"))
#dataset/Ukraine/ABC News - Russian troops close in on Ukrainian capital of Kyiv_final.xlsx
#dataset/Khaled/DJ Khaled - EVERY CHANCE I GET (Official Music Video)_final.xlsx
#dataset/Minecraft Speedrunner/Minecraft_Speedrunner_Top20k.xlsx
#dataset/MrBeast/mrbeast_Top20k.xlsx

df_output=pd.DataFrame (columns=["ID", "Comments", "Score"])

print(df_input.head())

e:\Programming\Final Year Project\Quality-of-sentiment-analysis-tools
   ID                                           Comments  \
0   0  Tell Biden if he fixes the inflation, The gas ...   
1   1  Once again Fake News showing old fake news cra...   
2   2  The crooks in DC want to rape the world 🌎.  Ca...   
3   3  All these people claiming that they want to fi...   
4   4  Kick ass Vladimir. Greatest statesman of the c...   

                   Comment ID  Reply count  Like count          Published at  
0  UgyRvi3oz-A1yD19nnx4AaABAg            0           0  2022-03-19T21:45:53Z  
1  UgySDaQnI_CkUW8OEIV4AaABAg            0           1  2022-03-19T13:00:55Z  
2  UgwbOB-sGjObW7qY-ft4AaABAg            0           0  2022-03-18T02:25:50Z  
3  UgwjGg-RCT3JTeU5eoh4AaABAg            0           0  2022-03-18T02:21:13Z  
4  UgyDCaa7AhdCn34sqsN4AaABAg            0           0  2022-03-18T02:18:52Z  


In [37]:
df_input_comments = df_input[df_input.columns[:2]]
print(df_input_comments)

sentiwordnetData_sentence = []

          ID                                           Comments
0          0  Tell Biden if he fixes the inflation, The gas ...
1          1  Once again Fake News showing old fake news cra...
2          2  The crooks in DC want to rape the world 🌎.  Ca...
3          3  All these people claiming that they want to fi...
4          4  Kick ass Vladimir. Greatest statesman of the c...
...      ...                                                ...
14346  14346                            Wishing for the best ❤️
14347  14347                                         DARKNESS🤘☻
14348  14348  I believe this could have been prevented. In s...
14349  14349  *The whole truth about the coronavirus. Watch ...
14350  14350  The European Union, the United States , the Un...

[14351 rows x 2 columns]


In [38]:
#nltk.download("sentiwordnet")

for i in range (len(df_input_comments)):
    sentiwordnetData_sentence.append(sentiment_analyzer_scores(df_input_comments.astype(str).iloc[i][1]))
    #https://stackoverflow.com/questions/34724246/attributeerror-float-object-has-no-attribute-lower

    if i % 100 == 72:
        print(i)

print(len(sentiwordnetData_sentence))

72
172
272
372
472
572
672
772
872
972
1072
1172
1272
1372
1472
1572
1672
1772
1872
1972
2072
2172
2272
2372
2472
2572
2672
2772
2872
2972
3072
3172
3272
3372
3472
3572
3672
3772
3872
3972
4072
4172
4272
4372
4472
4572
4672
4772
4872
4972
5072
5172
5272
5372
5472
5572
5672
5772
5872
5972
6072
6172
6272
6372
6472
6572
6672
6772
6872
6972
7072
7172
7272
7372
7472
7572
7672
7772
7872
7972
8072
8172
8272
8372
8472
8572
8672
8772
8872
8972
9072
9172
9272
9372
9472
9572
9672
9772
9872
9972
10072
10172
10272
10372
10472
10572
10672
10772
10872
10972
11072
11172
11272
11372
11472
11572
11672
11772
11872
11972
12072
12172
12272
12372
12472
12572
12672
12772
12872
12972
13072
13172
13272
13372
13472
13572
13672
13772
13872
13972
14072
14172
14272
14351


In [39]:
df_output["ID"] = df_input_comments["ID"]
df_output["Comments"] = df_input_comments["Comments"]
df_output["Polarity"] = sentiwordnetData_sentence[:len(sentiwordnetData_sentence)]

df_output.to_excel(os.path.join(f"{dd}", "dataset/Ukraine/Sentiwordnet_ABC News - Russian troops close in on Ukrainian capital of Kyiv_final.xlsx"))
#dataset/Ukraine/Sentiwordnet_ABC News - Russian troops close in on Ukrainian capital of Kyiv_final.xlsx
#dataset/Khaled/Sentiwordnet_DJ Khaled - EVERY CHANCE I GET (Official Music Video)_final.xlsx
#dataset/Minecraft Speedrunner/Sentiwordnet_Minecraft_Speedrunner_final.xlsx
#dataset/MrBeast/Sentiwordnet_Minecraft_Speedrunner_Top20k.xlsx