In [1]:
# !pip install google-api-python-client

In [2]:
# !pip install transformers

In [1]:
import os
import googleapiclient.discovery

In [2]:

def get_youtube_comments(video_url, api_key):
    # Extract video ID from the URL
    video_id = video_url.split("v=")[1]

    # Create a YouTube API client
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    # Get video details
    video_response = youtube.videos().list(
        part="snippet",
        id=video_id
    ).execute()

    video_title = video_response["items"][0]["snippet"]["title"]

    # Get comments
    unique_commenters = set()  # Set to store unique commenter IDs
    comments = []
    nextPageToken = None

    while True:
        comment_response = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=100,  # Adjust as needed
            pageToken=nextPageToken
        ).execute()

        for item in comment_response["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            commenter_id = item["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"]
            comments.append(comment)
            unique_commenters.add(commenter_id)  # Add to set of unique commenters
            
        nextPageToken = comment_response.get("nextPageToken")

        if not nextPageToken:
            break

    return video_title, comments, len(unique_commenters)

In [3]:
# # Gives Comments of only Top Level
# import googleapiclient.discovery
# import googleapiclient.errors
# import re

# def get_youtube_comments(video_url, api_key):
#     # Improved extraction of video ID using regex to handle complex URLs
#     match = re.search(r"v=([a-zA-Z0-9_-]+)", video_url)
#     if not match:
#         raise ValueError("Could not extract video ID from URL")
#     video_id = match.group(1)

#     # Create a YouTube API client
#     youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
    
    
   

#     try:
#         # Get video details
#         video_response = youtube.videos().list(
#             part="snippet",
#             id=video_id
#         ).execute()

#         if not video_response["items"]:
#             return "Video title not found", [], 0  # No items found, possibly private video or incorrect ID

#         video_title = video_response["items"][0]["snippet"]["title"]
        
#         # Initialize variables for comments and unique commenters
#         comments = []
#         unique_commenters = set()
#         nextPageToken = None

#         # Loop to fetch all comments
#         while True:
#             comment_response = youtube.commentThreads().list(
#                 part="snippet",
#                 videoId=video_id,
#                 maxResults=100,  # Adjust as needed
#                 pageToken=nextPageToken
#             ).execute()

#             for item in comment_response["items"]:
#                 comment_text = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
#                 commenter_name = item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
#                 commenter_id = item["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"]
#                 comments.append((commenter_name, comment_text))  # Store tuple of name and comment
#                 unique_commenters.add(commenter_id)  # Add to set of unique commenters

#             nextPageToken = comment_response.get("nextPageToken")
#             if not nextPageToken:
#                 break

#     except googleapiclient.errors.HttpError as error:
#         print(f"An HTTP error occurred: {error}")
#         return "Error occurred", [], 0

#     return video_title, comments, len(unique_commenters)

In [5]:
# Give Comments All Levels
def get_youtube_comments(video_url, api_key):
    import googleapiclient.discovery
    import re

    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
    video_id = re.search(r"v=([a-zA-Z0-9_-]+)", video_url).group(1) if re.search(r"v=([a-zA-Z0-9_-]+)", video_url) else None

    if not video_id:
        raise ValueError("Could not extract video ID from URL")

    try:
        video_response = youtube.videos().list(part="snippet", id=video_id).execute()
        video_title = video_response["items"][0]["snippet"]["title"] if video_response["items"] else "Video title not found"
        
        comments = []
        unique_commenters = set()
        nextPageToken = None
        while True:
            comment_response = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id,
                maxResults=100,
                pageToken=nextPageToken
            ).execute()

            for item in comment_response["items"]:
                top_comment = item["snippet"]["topLevelComment"]["snippet"]
                comments.append((top_comment["authorDisplayName"], top_comment["textDisplay"]))
                if "authorChannelId" in top_comment:
                    unique_commenters.add(top_comment["authorChannelId"]["value"])

                # Fetch replies if any
                if 'replies' in item:
                    for reply in item["replies"]["comments"]:
                        reply_snippet = reply["snippet"]
                        comments.append((reply_snippet["authorDisplayName"], reply_snippet["textDisplay"]))
                        if "authorChannelId" in reply_snippet:
                            unique_commenters.add(reply_snippet["authorChannelId"]["value"])

            nextPageToken = comment_response.get("nextPageToken")
            if not nextPageToken:
                break

    except googleapiclient.errors.HttpError as error:
        print(f"An HTTP error occurred: {error}")
        return video_title, [], 0

    return video_title, comments, len(unique_commenters)

In [6]:
# https://www.youtube.com/watch?v=bd3ASXhE8Rs&ab_channel=AnimeBallsDeep
# https://www.youtube.com/watch?v=AhJ9-AtFje0

In [7]:
if __name__ == "__main__":
    # Replace 'YOUR_API_KEY' with your actual YouTube API key
    api_key = 'AIzaSyCosmQkZjn5qU-x25gRcmj4WZedXan66Tc'

    # Replace 'YOUR_VIDEO_URL' with the YouTube video URL
    video_url = 'https://www.youtube.com/watch?v=AhJ9-AtFje0'

    video_title, comments, unique_commenters_count = get_youtube_comments(video_url, api_key)
    
    original_comment_count = len(comments)

    print(f"Video Title: {video_title}")
    print(f"Total Comments: {len(comments)}")
    print(f"Total Unique Commenters: {unique_commenters_count}")
    print("\nComments:")
    for i, comment in enumerate(comments, start=1):
        print(f"{i}. {comment}")

Video Title: Spring Lofi 🌸 Lofi Keep You Safe 🌼 Smooth Mind with Spring Lofi Hip Hop ~ beats relax,sleep...
Total Comments: 96
Total Unique Commenters: 69

Comments:
1. ('@kakabilaal4626', 'good you do&#39;t so to')
2. ('@kakabilaal4626', 'good don&#39;t do me no of the to<br>hahahah')
3. ('@kakabilaal4626', 'hi the you ... l don&#39;t so do good lo of go')
4. ('@Monremat1cgaming', 'done')
5. ('@LofiStranger', 'Love this!!! 🎶🎵🎶🎵')
6. ('@CyborgChickMusic', 'Please check out my synth music would love to know what you think!🎵💜')
7. ('@somimusic1', '게임 넘 멋져요. 모바일출시는 계획없으실가요? 언제 출시하시나요 해보고싶네요 ㅎ')
8. ('@gonkmem', '💖💖💖')
9. ('@ZENITH_POINT_', 'love this')
10. ('@rosemberl', 'anyone knows how I can get the wallpaper ???')
11. ('@Cleaningcolorful', 'amazing !!!')
12. ('@FluoraLounge', 'what song is <a href="https://www.youtube.com/watch?v=AhJ9-AtFje0&amp;t=536">8:56</a>?')
13. ('@DeliaRiO', 'esta musica me inspira a trabajar, gracias por compartir, saludos desde México')
14. ('@th10proclubs', '

In [8]:
# Detailed printout of each comment for debugging
for idx, (commenter, comment_text) in enumerate(comments, 1):
    print(f"{idx}. {commenter}: {comment_text[:100]}")  # Print first 100 chars of each comment

1. @kakabilaal4626: good you do&#39;t so to
2. @kakabilaal4626: good don&#39;t do me no of the to<br>hahahah
3. @kakabilaal4626: hi the you ... l don&#39;t so do good lo of go
4. @Monremat1cgaming: done
5. @LofiStranger: Love this!!! 🎶🎵🎶🎵
6. @CyborgChickMusic: Please check out my synth music would love to know what you think!🎵💜
7. @somimusic1: 게임 넘 멋져요. 모바일출시는 계획없으실가요? 언제 출시하시나요 해보고싶네요 ㅎ
8. @gonkmem: 💖💖💖
9. @ZENITH_POINT_: love this
10. @rosemberl: anyone knows how I can get the wallpaper ???
11. @Cleaningcolorful: amazing !!!
12. @FluoraLounge: what song is <a href="https://www.youtube.com/watch?v=AhJ9-AtFje0&amp;t=536">8:56</a>?
13. @DeliaRiO: esta musica me inspira a trabajar, gracias por compartir, saludos desde México
14. @th10proclubs: Que som GoodVibes
<br>Thankfully, I enj
16. @scalinovkpadonou7741: Same
17. @25TO35: This playlists is in my favourites!             ATTENTIONNN          Can anyone help me find the tra
18. @WOWSPACE7: Компания активно использует сетевые медиа-ресу

In [10]:
# print(f"Total Comments: {len(comment_text)}")

In [11]:
# !pip install tensorflow

In [12]:
# !pip show tensorflow transformers

In [9]:
import tensorflow as tf
from tensorflow import keras
from transformers import pipeline, TFAutoModelForSequenceClassification, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")
model = TFAutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [11]:
from transformers import AutoConfig

# Load configuration from a pre-trained model
config = AutoConfig.from_pretrained("SamLowe/roberta-base-go_emotions")

# Print label to ID mapping (if available)
if hasattr(config, 'label2id'):
    print("Label to ID Mapping:")
    print(config.label2id)

# Print ID to label mapping (if available)
if hasattr(config, 'id2label'):
    print("\nID to Label Mapping:")
    print(config.id2label)

Label to ID Mapping:
{'admiration': 0, 'amusement': 1, 'anger': 2, 'annoyance': 3, 'approval': 4, 'caring': 5, 'confusion': 6, 'curiosity': 7, 'desire': 8, 'disappointment': 9, 'disapproval': 10, 'disgust': 11, 'embarrassment': 12, 'excitement': 13, 'fear': 14, 'gratitude': 15, 'grief': 16, 'joy': 17, 'love': 18, 'nervousness': 19, 'neutral': 27, 'optimism': 20, 'pride': 21, 'realization': 22, 'relief': 23, 'remorse': 24, 'sadness': 25, 'surprise': 26}

ID to Label Mapping:
{0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion', 7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment', 13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness', 20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise', 27: 'neutral'}


In [12]:
# Function to convert logits to label based on the highest probability
def get_prediction_label(logits):
    probabilities = tf.nn.softmax(logits, axis=-1)
    max_index = tf.argmax(probabilities, axis=-1).numpy()[0]
    return config.id2label[max_index]  # Fetch label using id2label

In [13]:
import pandas as pd

# List to hold all data
data = []

# Updated process_comment to directly return the label
def process_comment(comment, tokenizer, model):
    inputs = tokenizer(comment, return_tensors="tf", truncation=True, padding=True, max_length=512)
    outputs = model(inputs)
    probabilities = tf.nn.softmax(outputs.logits, axis=-1)
    max_index = tf.argmax(probabilities, axis=-1).numpy()[0]  # Get the index of the highest score
    return config.id2label[max_index]  # Fetch label using id2label

In [14]:
# # Process and print each comment's predicted label
# for i, comment in enumerate(comments, start=1):
#     label = process_comment(comment, tokenizer, model)
#     data.append({'SerialNo': i, 'Label': label, 'Comment': comment})
#     print(f"{i}. {label}: {comment[:400]}...")  # Print only the first 400 chars for readability

In [15]:
# Process and print each comment's predicted label
for i, (commenter_name, comment_text) in enumerate(comments, start=1):
    label = process_comment(comment_text, tokenizer, model)
    data.append({'SerialNo': i, 'Label': label, 'Comment': comment_text, 'Commenter': commenter_name})
    print(f"{i}. {label}: {comment_text[:400]}... (Commented by: {commenter_name})")  # Print only the first 400 chars for readability

1. admiration: good you do&#39;t so to... (Commented by: @kakabilaal4626)
2. amusement: good don&#39;t do me no of the to<br>hahahah... (Commented by: @kakabilaal4626)
3. neutral: hi the you ... l don&#39;t so do good lo of go... (Commented by: @kakabilaal4626)
4. neutral: done... (Commented by: @Monremat1cgaming)
5. love: Love this!!! 🎶🎵🎶🎵... (Commented by: @LofiStranger)
6. love: Please check out my synth music would love to know what you think!🎵💜... (Commented by: @CyborgChickMusic)
7. curiosity: 게임 넘 멋져요. 모바일출시는 계획없으실가요? 언제 출시하시나요 해보고싶네요 ㅎ... (Commented by: @somimusic1)
8. approval: 💖💖💖... (Commented by: @gonkmem)
9. love: love this... (Commented by: @ZENITH_POINT_)
10. curiosity: anyone knows how I can get the wallpaper ???... (Commented by: @rosemberl)
11. surprise: amazing !!!... (Commented by: @Cleaningcolorful)
12. neutral: what song is <a href="https://www.youtube.com/watch?v=AhJ9-AtFje0&amp;t=536">8:56</a>?... (Commented by: @FluoraLounge)
13. neutral: esta musica me inspira

In [16]:
# Create DataFrame from collected data
df_user = pd.DataFrame(data, columns=['SerialNo', 'Label', 'Comment', 'Commenter'])
df =  pd.DataFrame(data, columns=['SerialNo', 'Label', 'Comment'])
df_user_sentiment = pd.DataFrame(data, columns=['Commenter','Label', 'Comment'])

In [17]:
# If you want a DataFrame just with usernames
username_df_user = df_user[['Commenter']]

In [18]:
# print(data)

In [19]:
# Print the DataFrame to verify the contents
print(df_user)
print(df)
print(username_df_user)
print(df_user_sentiment)

    SerialNo       Label                                            Comment  \
0          1  admiration                            good you do&#39;t so to   
1          2   amusement       good don&#39;t do me no of the to<br>hahahah   
2          3     neutral     hi the you ... l don&#39;t so do good lo of go   
3          4     neutral                                               done   
4          5        love                                  Love this!!! 🎶🎵🎶🎵   
..       ...         ...                                                ...   
91        92     neutral                  ​@@user-fn3iv4on3fnot necessarily   
92        93  admiration  Very nice picture and cute little cabin. Keep ...   
93        94   gratitude                          Thank you very much &lt;3   
94        95  admiration  Your videos are really great and they keep get...   
95        96     neutral                                              &lt;3   

               Commenter  
0        @kakabilaal4626

In [20]:
# Count each sentiment
sentiment_counts = df['Label'].value_counts()
print("Sentiment Counts:")
print(sentiment_counts)

Sentiment Counts:
Label
neutral       36
gratitude     15
admiration     9
love           8
curiosity      7
joy            7
caring         5
approval       4
amusement      2
surprise       1
desire         1
annoyance      1
Name: count, dtype: int64


In [21]:
# # Count each sentiment
# sentiment_counts_user = df_user['Label'].value_counts()
# print("Sentiment Counts:")
# print(sentiment_counts_user)

In [22]:
# !pip install prettytable

In [23]:
import pandas as pd
from prettytable import PrettyTable

# Count each sentiment
sentiment_counts = df['Label'].value_counts().reset_index()
sentiment_counts.columns = ['Sentiment', 'Count']  # Renaming columns for better readability

# Using PrettyTable to display the sentiment counts
table = PrettyTable()
table.field_names = ['Sentiment', 'Count']  # Define field names

# Add rows to the table
for _, row in sentiment_counts.iterrows():
    table.add_row(row)

print("Sentiment Counts:")
print(table)

Sentiment Counts:
+------------+-------+
| Sentiment  | Count |
+------------+-------+
|  neutral   |   36  |
| gratitude  |   15  |
| admiration |   9   |
|    love    |   8   |
| curiosity  |   7   |
|    joy     |   7   |
|   caring   |   5   |
|  approval  |   4   |
| amusement  |   2   |
|  surprise  |   1   |
|   desire   |   1   |
| annoyance  |   1   |
+------------+-------+


In [24]:
# import pandas as pd
# from prettytable import PrettyTable

# # Count each sentiment
# sentiment_counts_user = df_user['Label'].value_counts().reset_index()
# sentiment_counts_user.columns = ['Sentiment', 'Count']  # Renaming columns for better readability

# # Using PrettyTable to display the sentiment counts
# table_user = PrettyTable()
# table_user.field_names = ['Sentiment', 'Count']  # Define field names

# # Add rows to the table
# for _, row in sentiment_counts.iterrows():
#     table_user.add_row(row)

# print("Sentiment Counts:")
# print(table_user)

In [25]:
df_sorted = df.sort_values(by='Label')

print(df_sorted)

    SerialNo       Label                                            Comment
0          1  admiration                            good you do&#39;t so to
72        73  admiration                                           Amazing❤
76        77  admiration  These arts on your videos are just wow, so bea...
55        56  admiration                                            Cool!!!
52        53  admiration                                            Sweet 🌸
..       ...         ...                                                ...
64        65     neutral                                 no track list :( ?
65        66     neutral                             @@andrespalumbo2615idk
29        30     neutral                            more jazz than hip hop.
95        96     neutral                                              &lt;3
10        11    surprise                                        amazing !!!

[96 rows x 3 columns]


In [26]:
df_sorted_user = df_user.sort_values(by='Label')

print(df_sorted_user)

    SerialNo       Label                                            Comment  \
0          1  admiration                            good you do&#39;t so to   
72        73  admiration                                           Amazing❤   
76        77  admiration  These arts on your videos are just wow, so bea...   
55        56  admiration                                            Cool!!!   
52        53  admiration                                            Sweet 🌸   
..       ...         ...                                                ...   
64        65     neutral                                 no track list :( ?   
65        66     neutral                             @@andrespalumbo2615idk   
29        30     neutral                            more jazz than hip hop.   
95        96     neutral                                              &lt;3   
10        11    surprise                                        amazing !!!   

             Commenter  
0      @kakabilaal4626  
7

In [27]:
df_sorted_user_sentiment = df_user_sentiment.sort_values(by='Label')
print(df_sorted_user_sentiment)

             Commenter       Label  \
0      @kakabilaal4626  admiration   
72         @IchiSunset  admiration   
76             @nnmzro  admiration   
55         @tomkeane16  admiration   
52       @ZoeLateNight  admiration   
..                 ...         ...   
64  @andrespalumbo2615     neutral   
65        @marouiii277     neutral   
29   @SwamptimeLoco513     neutral   
95    @lofikeepyousafe     neutral   
10   @Cleaningcolorful    surprise   

                                              Comment  
0                             good you do&#39;t so to  
72                                           Amazing❤  
76  These arts on your videos are just wow, so bea...  
55                                            Cool!!!  
52                                            Sweet 🌸  
..                                                ...  
64                                 no track list :( ?  
65                             @@andrespalumbo2615idk  
29                            more jazz

In [28]:
print(f"Video Title: {video_title}")
print(f"Total Comments: {len(comments)}")
print(f"Total Unique Commenters: {unique_commenters_count}")
print(df_sorted_user_sentiment)

Video Title: Spring Lofi 🌸 Lofi Keep You Safe 🌼 Smooth Mind with Spring Lofi Hip Hop ~ beats relax,sleep...
Total Comments: 96
Total Unique Commenters: 69
             Commenter       Label  \
0      @kakabilaal4626  admiration   
72         @IchiSunset  admiration   
76             @nnmzro  admiration   
55         @tomkeane16  admiration   
52       @ZoeLateNight  admiration   
..                 ...         ...   
64  @andrespalumbo2615     neutral   
65        @marouiii277     neutral   
29   @SwamptimeLoco513     neutral   
95    @lofikeepyousafe     neutral   
10   @Cleaningcolorful    surprise   

                                              Comment  
0                             good you do&#39;t so to  
72                                           Amazing❤  
76  These arts on your videos are just wow, so bea...  
55                                            Cool!!!  
52                                            Sweet 🌸  
..                                                ..

In [29]:
# !pip install sentencepiece

In [30]:
from transformers import TFPegasusForConditionalGeneration, AutoTokenizer
import pandas as pd
import re

def preprocess_comments(text):
    # Remove URLs and HTML tags
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'<[^>]+>', '', text)  # Remove HTML tags for cleaner text
    # Replace multiple newlines or break tags with a single space
    text = re.sub(r'[\r\n]+', ' ', text)
    # Remove non-ASCII characters
    text = text.encode('ascii', 'ignore').decode('ascii')
    return text

In [31]:
def chunk_text(text, max_length):
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) + 1 <= max_length:  # +1 for space
            current_chunk.append(word)
            current_length += len(word) + 1
        else:
            chunks.append(' '.join(current_chunk))
            current_chunk = [word]
            current_length = len(word) + 1
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    return chunks

In [32]:
# Load the Pegasus model and tokenizer using TensorFlow classes
tokenizer = AutoTokenizer.from_pretrained("google/pegasus-xsum")
model = TFPegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

# Assuming 'df' is your DataFrame containing YouTube comments grouped by sentiment
comments_by_sentiment = df.groupby('Label')['Comment'].apply(lambda x: ' '.join(str(comment) for comment in x)).to_dict()

# Dictionary to hold sentiment summaries
summary_by_sentiment = {}

# Then you can process each chunk separately:
for sentiment, comments in comments_by_sentiment.items():
    clean_comments = preprocess_comments(comments)
    chunks = chunk_text(clean_comments, 512)  # Split text into chunks suitable for the model
    summaries = []
    for chunk in chunks:
        inputs = tokenizer(chunk, return_tensors="tf", truncation=True, max_length=512)
        summary_ids = model.generate(inputs["input_ids"])
        summary_text = tokenizer.decode(summary_ids[0].numpy(), skip_special_tokens=True)
        summaries.append(summary_text)
    summary_by_sentiment[sentiment] = ' '.join(summaries)

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [33]:
# Output the summaries
for sentiment, summary in summary_by_sentiment.items():
    print(f"Summary for {sentiment}: {summary}")

Summary for admiration: All images are copyrighted.
Summary for amusement: BBC Sport takes a look back at some of the most memorable moments from this year's Rugby World Cup.
Summary for annoyance: hifi is not toxic...
Summary for approval: All images are copyrighted.
Summary for caring: All images are copyrighted.
Summary for curiosity: I'm having a bit of a problem with the wallpaper on my computer.
Summary for desire: All images are copyrighted.
Summary for gratitude: This is one of my all-time favourite tracks. In our series of letters from African journalists, filmmaker and columnist Farai Sevenzo looks back at some of his most memorable contributions.
Summary for joy: This is one of the best songs I've ever heard.
Summary for love: All images are copyrighted. Check out our selection of the best relaxing music videos!
Summary for neutral: .
Summary for surprise: All images are copyrighted.


In [34]:
# # Assuming 'df' is your DataFrame containing YouTube comments grouped by sentiment
# comments_by_sentiment_user = df_user.groupby('Label')['Comment'].apply(lambda x: ' '.join(str(comment) for comment in x)).to_dict()

# # Dictionary to hold sentiment summaries
# summary_by_sentiment_user = {}

# # Then you can process each chunk separately:
# for sentiment, comments in comments_by_sentiment_user.items():
#     clean_comments = preprocess_comments(comments)
#     chunks = chunk_text(clean_comments, 512)  # Split text into chunks suitable for the model
#     summaries = []
#     for chunk in chunks:
#         inputs = tokenizer(chunk, return_tensors="tf", truncation=True, max_length=512)
#         summary_ids = model.generate(inputs["input_ids"])
#         summary_text = tokenizer.decode(summary_ids[0].numpy(), skip_special_tokens=True)
#         summaries.append(summary_text)
#     summary_by_sentiment_user[sentiment] = ' '.join(summaries)

In [35]:
# # Output the summaries
# for sentiment, summary in summary_by_sentiment_user.items():
#     print(f"Summary for {sentiment}: {summary}")

In [36]:
#Printing All Important Info For Application
print(f"Video Title: {video_title}")
print(f"Total Comments: {original_comment_count}")
print(f"Total Unique Commenters: {unique_commenters_count}")
print(df_sorted_user_sentiment)
for sentiment, summary in summary_by_sentiment.items():
    print(f"Summary for {sentiment}: {summary}")

Video Title: Spring Lofi 🌸 Lofi Keep You Safe 🌼 Smooth Mind with Spring Lofi Hip Hop ~ beats relax,sleep...
Total Comments: 96
Total Unique Commenters: 69
             Commenter       Label  \
0      @kakabilaal4626  admiration   
72         @IchiSunset  admiration   
76             @nnmzro  admiration   
55         @tomkeane16  admiration   
52       @ZoeLateNight  admiration   
..                 ...         ...   
64  @andrespalumbo2615     neutral   
65        @marouiii277     neutral   
29   @SwamptimeLoco513     neutral   
95    @lofikeepyousafe     neutral   
10   @Cleaningcolorful    surprise   

                                              Comment  
0                             good you do&#39;t so to  
72                                           Amazing❤  
76  These arts on your videos are just wow, so bea...  
55                                            Cool!!!  
52                                            Sweet 🌸  
..                                                ..

In [37]:
# # Display the grouped comments
# for label, comments in comments_by_sentiment.items():
#     print(f"Label: {label}, Comments: {comments}")

In [38]:
# # Saving Models and Tokenizers

# # For the RoBERTa model
# roberta_tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")
# roberta_model = TFAutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")

# # Save the model and tokenizer to a directory
# roberta_tokenizer.save_pretrained('./roberta-go-emotions')
# roberta_model.save_pretrained('./roberta-go-emotions')

# # For the Pegasus model
# pegasus_tokenizer = AutoTokenizer.from_pretrained("google/pegasus-xsum")
# pegasus_model = TFPegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

# # Save the model and tokenizer to a directory
# pegasus_tokenizer.save_pretrained('./pegasus-xsum')
# pegasus_model.save_pretrained('./pegasus-xsum')

# Trouble Shooting

In [39]:
# for sentiment, comments in comments_by_sentiment.items():
#     clean_comments = preprocess_comments(comments)
#     chunks = chunk_text(clean_comments, 512)
#     print(f"Sentiment: {sentiment}, Number of chunks: {len(chunks)}")
#     summaries = []
#     for chunk in chunks:
#         inputs = tokenizer(chunk, return_tensors="tf", truncation=True, max_length=512)
#         summary_ids = model.generate(inputs["input_ids"])
#         summary_text = tokenizer.decode(summary_ids[0].numpy(), skip_special_tokens=True)
#         summaries.append(summary_text)
#     summary_by_sentiment[sentiment] = ' '.join(summaries)

In [44]:
# for label, comments in comments_by_sentiment.items():
#     print(f"Label: {label}, Length of concatenated comments: {len(comments)}")

In [45]:
# print(df.head())  # Look at the first few rows of the DataFrame
# print(df['Comment'].nunique())  # Count unique comments to see if there are duplicates
# print(df.shape[0])  # Total number of comments

In [46]:
# print(df.duplicated(subset=['Comment']).sum())  # Count duplicates based on the 'Comment' column
# df = df.drop_duplicates(subset=['Comment'])  # Remove duplicates if necessary