In [None]:
 # Import necessary libraries
import os
import csv
import json
from googleapiclient.discovery import build

# Set your YouTube API key
api_key = ''


In [None]:
# Define a function to check if a comment is spam
def is_spam(comment):
    # Define a list of keywords commonly associated with spam
    spam_keywords = ['click', 'free', 'earn money', 'limited time offer', 'guaranteed', 'winner', 'dating', 'send', '100%', 'crypto']

    # Check if any spam keyword is present in the comment
    return any(keyword in comment.lower() for keyword in spam_keywords)


In [None]:
def get_like_count(comment_id):
    try:
        youtube = build('youtube', 'v3', developerKey=api_key)
        comment_response = youtube.commentThreads().list(
            part='snippet',
            id=comment_id
        ).execute()

        if 'items' in comment_response and comment_response['items']:
            like_count = comment_response['items'][0]['snippet']['topLevelComment']['snippet']['likeCount']
            return like_count
    except Exception as e:
        print(f"Error fetching like count for Comment ID: {comment_id}, Error: {str(e)}")
    return None

In [None]:
def fetch_like_counts(input_csv, output_csv):
    with open(input_csv, mode='r', encoding='utf-8') as input_file, \
            open(output_csv, mode='w', newline='', encoding='utf-8') as output_file:
        reader = csv.DictReader(input_file)
        fieldnames = ['COMMENT_ID', 'AUTHOR', 'DATE', 'CONTENT', 'SUMMARY', 'LIKE_COUNT', 'CLASS']
        writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            comment_id = row['COMMENT_ID']
            author = row['AUTHOR']
            date = row['DATE']
            content = row['CONTENT']
            summary = row['SUMMARY']
            class_label = row['CLASS']

            like_count = get_like_count(comment_id)
            writer.writerow({
                'COMMENT_ID': comment_id,
                'AUTHOR': author,
                'DATE': date,
                'CONTENT': content,
                'SUMMARY': summary,
                'LIKE_COUNT': like_count,
                'CLASS': class_label
            })
            if like_count is not None:
                print(f"Comment ID: {comment_id}, Like Count: {like_count}")
            else:
                print(f"Comment ID: {comment_id}, Like Count: Not Available")

In [None]:
def video_comments(video_id):
    youtube = build('youtube', 'v3', developerKey=api_key)
    video_response = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id
    ).execute()

    with open('all_comments.csv', mode='w', newline='', encoding='utf-8') as all_comments_file, \
         open('spam_comments.csv', mode='w', newline='', encoding='utf-8') as spam_comments_file:

        all_comments_fieldnames = ['COMMENT_ID', 'AUTHOR', 'DATE', 'CONTENT', 'LIKE_COUNT', 'CLASS']
        spam_comments_fieldnames = ['COMMENT_ID', 'AUTHOR', 'DATE', 'CONTENT', 'LIKE_COUNT',  'CLASS']

        all_comments_writer = csv.DictWriter(all_comments_file, fieldnames=all_comments_fieldnames)
        all_comments_writer.writeheader()

        spam_comments_writer = csv.DictWriter(spam_comments_file, fieldnames=spam_comments_fieldnames)
        spam_comments_writer.writeheader()

        while video_response:
            for item in video_response['items']:
                comment_id = item['snippet']['topLevelComment']['id']
                author = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
                date = item['snippet']['topLevelComment']['snippet']['publishedAt']
                content = item['snippet']['topLevelComment']['snippet']['textDisplay']
                like_count = item['snippet']['topLevelComment']['snippet']['likeCount']
                is_spam_comment = is_spam(content)

                all_comments_writer.writerow({
                    'COMMENT_ID': comment_id,
                    'AUTHOR': author,
                    'DATE': date,
                    'CONTENT': content,
                    'LIKE_COUNT' : like_count,
                    'CLASS': '1' if is_spam_comment else '0'
                })

                if is_spam_comment:
                    spam_comments_writer.writerow({
                        'COMMENT_ID': comment_id,
                        'AUTHOR': author,
                        'DATE': date,
                        'LIKE_COUNT' :like_count,
                        'CONTENT': content
                    })
                    print(f"Spam Comment ID: {comment_id}")
                    print(f"Author: {author}")
                    print(f"Date: {date}")
                    print(f"Content: {content}")
                    print(f"Like Count: ", {like_count})

            if 'nextPageToken' in video_response:
                video_response = youtube.commentThreads().list(
                    part='snippet',
                    videoId=video_id,
                    pageToken=video_response['nextPageToken']
                ).execute()
            else:
                break

In [None]:
input_csv = 'Youtube05-Combined.csv'

# Specify the output CSV file for like counts
output_csv = 'comment_like_counts.csv'

# Call the function to fetch and save like counts based on comment IDs
fetch_like_counts(input_csv, output_csv)

Comment ID: LZQPQhLyRh80UYxNuaDWhIGQYNQ96IuCg-AYWqNPjpU, Like Count: Not Available
Comment ID: LZQPQhLyRh_C2cTtd9MvFRJedxydaVW-2sNg5Diuo4A, Like Count: Not Available
Comment ID: LZQPQhLyRh9MSZYnf8djyk0gEF9BHDPYrrK-qCczIY8, Like Count: Not Available
Error fetching like count for Comment ID: z13jhp0bxqncu512g22wvzkasxmvvzjaz04, Error: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&id=z13jhp0bxqncu512g22wvzkasxmvvzjaz04&key=AIzaSyAB3U_dJamEjJInolpVQdrp70KgMHzrBKA&alt=json returned "One or more specified comment threads could not be found. Check the value of the request's <code>id</code> parameter to ensure that it is correct.". Details: "[{'message': "One or more specified comment threads could not be found. Check the value of the request's <code>id</code> parameter to ensure that it is correct.", 'domain': 'youtube.commentThread', 'reason': 'commentThreadNotFound', 'location': 'id', 'locationType': 'parameter'}]">
Comment ID: z13jhp0b