In [1]:
import pandas as pd
import numpy as np
import requests
import os
import tensorflow as tf

In [2]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

In [3]:
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [4]:
ID = pd.read_csv("IDs.csv")["video_id"][0:10]
ID

0    --0bCF-iK2E
1    --14w5SOEUs
2    --40TEbZ9Is
3    --4tfbSyYDE
4    --DKkzWVh-E
5    --FmExEAsM8
6    --tbUe0JRc8
7    -024Swollbc
8    -0PZSxZuAXQ
9    -0QSEZIqVWc
Name: video_id, dtype: object

In [5]:
IDS_df = pd.DataFrame(ID)
IDS_df

Unnamed: 0,video_id
0,--0bCF-iK2E
1,--14w5SOEUs
2,--40TEbZ9Is
3,--4tfbSyYDE
4,--DKkzWVh-E
5,--FmExEAsM8
6,--tbUe0JRc8
7,-024Swollbc
8,-0PZSxZuAXQ
9,-0QSEZIqVWc


In [6]:
API_KEY = os.environ.get('API_KEY')

In [7]:
for i in range(10):
    file_name = f"data{i}.csv"  

    df = pd.read_csv(file_name)
    

    
    
    


In [8]:
df

Unnamed: 0.1,Unnamed: 0,comment,author,likecount,date,replies,sort
0,0,Are you guys enjoying the new AUT release?\nSa...,VarietyJay,259,2021-08-31T04:46:49Z,146,recent
1,1,How do i get a stand,Im_pablo,0,2022-06-25T13:43:50Z,0,recent
2,2,how do i get my dtwhv back?,Basilvn,0,2022-05-29T14:05:00Z,1,recent
3,3,Sarry,Hmm,0,2022-05-25T10:36:31Z,0,recent
4,4,What I didn’t know jay played aut,Pixelated_ Dan,0,2022-05-25T07:55:32Z,1,recent
...,...,...,...,...,...,...,...
195,95,This helps when your not stuck on the data loa...,dudej,3,2021-08-31T10:25:35Z,3,relevance
196,96,Actually you don’t talk to DIO to get the worl...,flinxzy,0,2021-08-31T16:38:58Z,1,relevance
197,97,Why can't I summon my spova? Do I have to get ...,icey,0,2021-09-01T00:45:02Z,0,relevance
198,98,Stwr might be bugged I used the cursed orb wit...,Yami Gloxx,0,2021-08-31T16:54:47Z,0,relevance


In [9]:
likes = df[df['likecount'] > 0]['likecount'].sort_values(ascending=True)

In [10]:
# likes = sorted(list(df[df['likecount'] > 0]['likecount']))

In [11]:
def making_weights(num):
    '''This function makes weights for each comment based on its like count (num)'''
    if num == 0:
        return 1
    elif num > 0 and num <= np.median(likes[:len(likes)//2]):
        return 2
    elif num > np.median(likes[:len(likes)//2]) and num <= np.median(likes):
        return 3
    elif num > np.median(likes) and num < np.median(likes[len(likes)//2:]):
        return 4
    else:
        return 5

In [12]:
df['weight'] = df['likecount'].apply(making_weights)

In [13]:
for text in df['comment']:
        # Tokenization
        tokens = tokenizer.encode_plus(text, add_special_tokens=True, padding='longest', truncation=True, max_length=512, return_tensors='tf')

        # Sentiment Prediction
        outputs = model(tokens.input_ids)
        logits = outputs.logits
        prediction = np.array(tf.nn.softmax(logits)[0])

        # Interpretation
        predicted_class = tf.argmax(logits, axis=1).numpy()[0]
        sentiment_labels = ["Negative", "Neutral", "Positive"]
        predicted_sentiment = sentiment_labels[predicted_class]

        # Print the sentiment prediction
        print(f"Text: {text}")
        print(f"Predicted Negative: {round(prediction[0]*100,2)}%")
        print(f"Predicted Neutral: {round(prediction[1]*100,2)}%")
        print(f"Predicted Positive: {round(prediction[2]*100,2)}%\n")



Text: Are you guys enjoying the new AUT release?
Sakuya is from a portals that randomly spawn and then you do a quest to get a watch which you use modeless.
Gojo is on the mountain near the arena, you need lots of cash for him.
Reaper needs two death notes that spawn randomly.
Sol is from killing Dio with Star Platinum and doing the quest of the NPC that spawns (it's broken right now)
STWR is a % chance to get, it is NOT GUARANTEED, use the orb at your discretion.
Predicted Negative: 27.69%
Predicted Neutral: 62.06%
Predicted Positive: 10.25%

Text: How do i get a stand
Predicted Negative: 16.71%
Predicted Neutral: 75.82%
Predicted Positive: 7.47%

Text: how do i get my dtwhv back?
Predicted Negative: 19.84%
Predicted Neutral: 76.25%
Predicted Positive: 3.92%

Text: Sarry
Predicted Negative: 37.23%
Predicted Neutral: 48.74%
Predicted Positive: 14.03%

Text: What I didn’t know jay played aut
Predicted Negative: 30.32%
Predicted Neutral: 63.97%
Predicted Positive: 5.71%

Text: First
Pred

Text: Wait, but where Is zenith?
Predicted Negative: 12.3%
Predicted Neutral: 83.33%
Predicted Positive: 4.37%

Text: This kinda looks hard of Just to get a Stand 😅
Predicted Negative: 39.27%
Predicted Neutral: 51.19%
Predicted Positive: 9.54%

Text: Could you tell me what are the stands that got removed because when I joined the new update all my stands were gone;-:
Predicted Negative: 44.39%
Predicted Neutral: 52.98%
Predicted Positive: 2.64%

Text: whats better star platinum or shadow dio
Predicted Negative: 4.88%
Predicted Neutral: 83.93%
Predicted Positive: 11.19%

Text: Incorrect info in a lot of places.
Predicted Negative: 8.97%
Predicted Neutral: 74.89%
Predicted Positive: 16.14%

Text: But what about the black purple portal that I entered, it took me to a big room with glowing walls and an NPC asking me if I wanted to manipulate time at my will?
Predicted Negative: 6.79%
Predicted Neutral: 83.95%
Predicted Positive: 9.26%

Text: I’m so confused I don’t see how gaster is that h

Text: to add on to this video, p//s is obtainable via arrow, although it's probably very rare as it took me a while to get it.

sorry for the misunderstanding. the trello came back for a bit, and as it turns out, you still need to get the effect and then summon crystallized to get it. so i must have gotten it without knowing and gotten crystallized. oops
Predicted Negative: 31.83%
Predicted Neutral: 52.59%
Predicted Positive: 15.57%

Text: If anybody is asking where to get sakuya spec to get sakuya spec find a portal which is where sakuya herself gives u a quest, now its a chance of spawn yes its  18% with 600 sec
Predicted Negative: 4.27%
Predicted Neutral: 78.1%
Predicted Positive: 17.63%

Text: Thanks! I thought you got TW from arrows and I used like 4 arrows until someone told me so I watched your vid and it really helped me out!
Predicted Negative: 0.25%
Predicted Neutral: 5.56%
Predicted Positive: 94.19%

Text: There two stands that where not mentioned, twau (They call it the wor

Text: I got gaster before the big update but they removed the awakening :(
Predicted Negative: 81.65%
Predicted Neutral: 17.02%
Predicted Positive: 1.33%

Text: when i used cursed orb on stw i didnt get stwr it just remained the same
Predicted Negative: 22.66%
Predicted Neutral: 72.09%
Predicted Positive: 5.25%

Text: do arrows and meteors despawn? if they do how long do they stay for before despawning?
Predicted Negative: 5.47%
Predicted Neutral: 91.21%
Predicted Positive: 3.32%

Text: in the devils palm, do you need a specific stand to get items out of the sand search areas? because i’m interacting with it, and i never get anything from it.
Predicted Negative: 57.92%
Predicted Neutral: 39.16%
Predicted Positive: 2.92%

Text: there is also the world high voltage that can be obtainable in funny valentine's dimension and using an eye of the saint
Predicted Negative: 0.52%
Predicted Neutral: 38.83%
Predicted Positive: 60.65%

Text: But what about the black purple portal that I entered, i

Text: The way he says sans is both funny and cursed
Predicted Negative: 64.6%
Predicted Neutral: 30.47%
Predicted Positive: 4.93%

Text: I used a cursed orb on shadow dio and didn't do anything and I lost my cursed orb.
Predicted Negative: 83.95%
Predicted Neutral: 15.33%
Predicted Positive: 0.72%

Text: This helps when your not stuck on the data loading screen and when you get in it just kicks you out and your up all night trying to play a 3D lego game...
Predicted Negative: 22.81%
Predicted Neutral: 47.33%
Predicted Positive: 29.86%

Text: Actually you don’t talk to DIO to get the world over heaven (that was nocturnus quest) to get the world over heaven you need to get the world then use a DIO diary
Predicted Negative: 15.11%
Predicted Neutral: 76.06%
Predicted Positive: 8.83%

Text: Why can't I summon my spova? Do I have to get the updated 1 or the odl 1 still works?
Predicted Negative: 58.7%
Predicted Neutral: 39.22%
Predicted Positive: 2.09%

Text: Stwr might be bugged I used the 

In [14]:
prediction[0]

0.07105174

In [15]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification


In [16]:
def sentiment_score_comment(df):

    '''This function predicts the sentiment score of each youtube video!'''
    
    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

    # Lists to store the sentiment analysis results
    sentiment_list = []
    negative_list = []
    neutral_list = []
    positive_list = []
    scalar_value_list = []
    weighted_SV = []
    weight = list(df['likecount'].apply(making_weights))

    # Iterate over the comments in the DataFrame
    for i, text in enumerate(df['comment']):
        
        # Tokenization, Sentiment Prediction, and Interpretation
        tokens = tokenizer.encode_plus(text, add_special_tokens=True, padding='longest', truncation=True, max_length=512, return_tensors='tf')
        outputs = model(tokens.input_ids)
        logits = outputs.logits
        prediction = np.array(tf.nn.softmax(logits)[0])
        predicted_class = tf.argmax(logits, axis=1).numpy()[0]
        sentiment_labels = ["Negative", "Neutral", "Positive"]
        predicted_sentiment = sentiment_labels[predicted_class]

        # Append the sentiment analysis results to the respective lists
        sentiment_list.append(predicted_sentiment)
        negative_list.append(round(prediction[0]*100, 2))
        neutral_list.append(round(prediction[1]*100, 2))
        positive_list.append(round(prediction[2]*100, 2))
        scalar_value_val = round((prediction[0])*-1+(prediction[2]*1),2)
        scalar_value_list.append(scalar_value_val)
        weighted_SV.append(df['weight'].iloc[i] * scalar_value_val)


    # Create a new DataFrame with the sentiment analysis results
    results_df = pd.DataFrame({
        'Comment': df['comment'],
        'Sentiment': sentiment_list,
        'Negative (%)': negative_list,
        'Neutral (%)': neutral_list,
        'Positive (%)': positive_list,
        'Scaler_value': scalar_value_list,
        'weighted_SV': weighted_SV,
        'weight': weight
    })

    # Return the new DataFrame
    return results_df


In [19]:
results_df = sentiment_score_comment(df)

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [33]:
results_df

Unnamed: 0,Comment,Sentiment,Negative (%),Neutral (%),Positive (%),Scaler_value,weighted_SV,weight
0,Are you guys enjoying the new AUT release?\nSa...,Neutral,27.69,62.06,10.25,-0.17,-0.85,5
1,How do i get a stand,Neutral,16.71,75.82,7.47,-0.09,-0.09,1
2,how do i get my dtwhv back?,Neutral,19.84,76.25,3.92,-0.16,-0.16,1
3,Sarry,Neutral,37.23,48.74,14.03,-0.23,-0.23,1
4,What I didn’t know jay played aut,Neutral,30.32,63.97,5.71,-0.25,-0.25,1
...,...,...,...,...,...,...,...,...
195,This helps when your not stuck on the data loa...,Neutral,22.81,47.33,29.86,0.07,0.21,3
196,Actually you don’t talk to DIO to get the worl...,Neutral,15.11,76.06,8.83,-0.06,-0.06,1
197,Why can't I summon my spova? Do I have to get ...,Negative,58.70,39.22,2.09,-0.57,-0.57,1
198,Stwr might be bugged I used the cursed orb wit...,Negative,71.60,27.22,1.18,-0.70,-0.70,1


In [21]:
final_df = pd.DataFrame(df)

In [22]:
final_df

Unnamed: 0.1,Unnamed: 0,comment,author,likecount,date,replies,sort,weight
0,0,Are you guys enjoying the new AUT release?\nSa...,VarietyJay,259,2021-08-31T04:46:49Z,146,recent,5
1,1,How do i get a stand,Im_pablo,0,2022-06-25T13:43:50Z,0,recent,1
2,2,how do i get my dtwhv back?,Basilvn,0,2022-05-29T14:05:00Z,1,recent,1
3,3,Sarry,Hmm,0,2022-05-25T10:36:31Z,0,recent,1
4,4,What I didn’t know jay played aut,Pixelated_ Dan,0,2022-05-25T07:55:32Z,1,recent,1
...,...,...,...,...,...,...,...,...
195,95,This helps when your not stuck on the data loa...,dudej,3,2021-08-31T10:25:35Z,3,relevance,3
196,96,Actually you don’t talk to DIO to get the worl...,flinxzy,0,2021-08-31T16:38:58Z,1,relevance,1
197,97,Why can't I summon my spova? Do I have to get ...,icey,0,2021-09-01T00:45:02Z,0,relevance,1
198,98,Stwr might be bugged I used the cursed orb wit...,Yami Gloxx,0,2021-08-31T16:54:47Z,0,relevance,1


In [23]:
IDS_df['positivity_score'] = np.nan

In [45]:
for i in range(10):
    file_name = f"data{i}.csv"  

    df = pd.read_csv(file_name)
    
    df['weight'] = df['likecount'].apply(making_weights)
    
    scores_df = sentiment_score_comment(df)
    
    positivity_score = scores_df['weighted_SV'].mean()
    
    IDS_df.iloc[i, 1] = positivity_score
    
    
    


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the mode

In [46]:
IDS_df

Unnamed: 0,video_id,positivity_score
0,--0bCF-iK2E,0.61375
1,--14w5SOEUs,1.51775
2,--40TEbZ9Is,1.7243
3,--4tfbSyYDE,-0.75745
4,--DKkzWVh-E,0.4701
5,--FmExEAsM8,2.289296
6,--tbUe0JRc8,0.4205
7,-024Swollbc,0.8767
8,-0PZSxZuAXQ,1.2013
9,-0QSEZIqVWc,-0.0199


In [47]:
IDS_df

Unnamed: 0,video_id,positivity_score
0,--0bCF-iK2E,0.61375
1,--14w5SOEUs,1.51775
2,--40TEbZ9Is,1.7243
3,--4tfbSyYDE,-0.75745
4,--DKkzWVh-E,0.4701
5,--FmExEAsM8,2.289296
6,--tbUe0JRc8,0.4205
7,-024Swollbc,0.8767
8,-0PZSxZuAXQ,1.2013
9,-0QSEZIqVWc,-0.0199


In [30]:
list(df['likecount'].apply(making_weights))

[5,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 5,
 5,
 3,
 1,
 3,
 2,
 4,
 1,
 1,
 1,
 1,
 4,
 5,
 1,
 1,
 1,
 1,
 1,
 4,
 1,
 1,
 1,
 1,
 2,
 3,
 1,
 1,
 1,
 1,
 1,
 5,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 5,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 2,
 5,
 1,
 1,
 1,
 1,
 1,
 4,
 2,
 1,
 1,
 1,
 1,
 4,
 1,
 1,
 1,
 1,
 1,
 3,
 2,
 1,
 1,
 1,
 1,
 4,
 1,
 1,
 1,
 1,
 1,
 5,
 1,
 1,
 2,
 1,
 1,
 5,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1]

In [31]:
# results_df['weight'] = df['weight']

In [36]:
results_df['ID'] = ID

In [37]:
results_df

Unnamed: 0,Comment,Sentiment,Negative (%),Neutral (%),Positive (%),Scaler_value,weighted_SV,weight,ID
0,Are you guys enjoying the new AUT release?\nSa...,Neutral,27.69,62.06,10.25,-0.17,-0.85,5,--0bCF-iK2E
1,How do i get a stand,Neutral,16.71,75.82,7.47,-0.09,-0.09,1,--14w5SOEUs
2,how do i get my dtwhv back?,Neutral,19.84,76.25,3.92,-0.16,-0.16,1,--40TEbZ9Is
3,Sarry,Neutral,37.23,48.74,14.03,-0.23,-0.23,1,--4tfbSyYDE
4,What I didn’t know jay played aut,Neutral,30.32,63.97,5.71,-0.25,-0.25,1,--DKkzWVh-E
...,...,...,...,...,...,...,...,...,...
195,This helps when your not stuck on the data loa...,Neutral,22.81,47.33,29.86,0.07,0.21,3,
196,Actually you don’t talk to DIO to get the worl...,Neutral,15.11,76.06,8.83,-0.06,-0.06,1,
197,Why can't I summon my spova? Do I have to get ...,Negative,58.70,39.22,2.09,-0.57,-0.57,1,
198,Stwr might be bugged I used the cursed orb wit...,Negative,71.60,27.22,1.18,-0.70,-0.70,1,


In [None]:
results_df.tail()

In [None]:
print(f'scalar_value: {round((prediction[0])*-1+(prediction[2]*1),2)}')

In [None]:
results_df['weighted_SV'] = results_df['Scaler_value'] * results_df['weight']

In [None]:
results_df['weighted_SV'].mean()

In [None]:
results_df

In [None]:
results = sentiment_score_comment(df)

In [None]:
results

In [None]:
# import pandas as pd
# import numpy as np

# # Lists to store the sentiment analysis results
# sentiment_list = []
# negative_list = []
# neutral_list = []
# positive_list = []

# # Iterate over the comments in the DataFrame
# for text in df['comment']:
#     # Tokenization, Sentiment Prediction, and Interpretation
# #     tokens = tokenizer.encode_plus(text, add_special_tokens=True, padding='longest', truncation=True, max_length=512, return_tensors='tf')
# #     outputs = model(tokens.input_ids)
# #     logits = outputs.logits
# #     prediction = np.array(tf.nn.softmax(logits)[0])
# #     predicted_class = tf.argmax(logits, axis=1).numpy()[0]
# #     sentiment_labels = ["Negative", "Neutral", "Positive"]
# #     predicted_sentiment = sentiment_labels[predicted_class]

# #     # Append the sentiment analysis results to the respective lists
# #     sentiment_list.append(predicted_sentiment)
# #     negative_list.append(round(prediction[0]*100, 2))
# #     neutral_list.append(round(prediction[1]*100, 2))
# #     positive_list.append(round(prediction[2]*100, 2))

# # Create a new DataFrame with the sentiment analysis results
# # results_df = pd.DataFrame({
# #     'Comment': df['comment'],
# #     'Sentiment': sentiment_list,
# #     'Negative (%)': negative_list,
# #     'Neutral (%)': neutral_list,
# #     'Positive (%)': positive_list
# # })

# # Print the new DataFrame
# print(results_df)


In [None]:
results_df