In [2]:
import pandas as pd
import json
import requests
from io import StringIO
from csv import reader
import os
import glob


In [16]:
# -*- coding: utf-8 -*-
__author__ = "Chirag Rathod (Srce Cde)"
__license__ = "GPL 3.0"
__email__ = "chiragr83@gmail.com"
__maintainer__ = "Chirag Rathod (Srce Cde)"

import os
from collections import defaultdict
import json
import logging
from helper import openURL, create_df
from config import YOUTUBE_COMMENT_URL, SAVE_PATH

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class VideoComment:
    def __init__(self, maxResults, videoId, key):
        self.save_path = f"{SAVE_PATH}/video-comments-csv"
        self.comments = defaultdict(list)
        self.replies = defaultdict(list)
        self.params = {
            "part": "snippet,replies",
            "maxResults": maxResults,
            "videoId": videoId,
            "textFormat": "plainText",
            "key": key,
        }
        os.makedirs(self.save_path, exist_ok=True)

    def load_comments(self, mat):
        for item in mat["items"]:
            comment = item["snippet"]["topLevelComment"]
            self.comments["id"].append(comment["id"])
            self.comments["comment"].append(comment["snippet"]["textDisplay"])
            self.comments["author"].append(comment["snippet"]["authorDisplayName"])
            self.comments["likecount"].append(comment["snippet"]["likeCount"])
            self.comments["publishedAt"].append(comment["snippet"]["publishedAt"])

            if "replies" in item.keys():
                for reply in item["replies"]["comments"]:
                    self.replies["parentId"].append(reply["snippet"]["parentId"])
                    self.replies["authorDisplayName"].append(
                        reply["snippet"]["authorDisplayName"]
                    )
                    self.replies["replyComment"].append(reply["snippet"]["textDisplay"])
                    self.replies["publishedAt"].append(reply["snippet"]["publishedAt"])
                    self.replies["likeCount"].append(reply["snippet"]["likeCount"])

    def get_video_comments(self):
        logger.info("Fetching data")
        url_response = json.loads(openURL(YOUTUBE_COMMENT_URL, self.params))
        nextPageToken = url_response.get("nextPageToken")
        try:
            if "error" in url_response:
                logger.error(f"{url_response['error']['message']}")
                raise Exception("The request cannot be completed!")
        except Exception as e:
            logger.error(e)
            return False
        self.load_comments(url_response)

        if nextPageToken:
            logger.info("Found paginated response")
            logger.info("Fetching paginated response & parsing")
            while nextPageToken:
                self.params.update({"pageToken": nextPageToken})
                url_response = json.loads(openURL(YOUTUBE_COMMENT_URL, self.params))
                nextPageToken = url_response.get("nextPageToken")
                self.load_comments(url_response)
        logger.info(f"Saving data as CSV at {self.save_path}")
        self.save_data()
        logger.info("Saved data successfully")

    def save_data(self):
        create_df(self.comments, f"{self.save_path}/{self.params['videoId']}   .parent_video_comment.csv")
        create_df(self.replies, f"{self.save_path}/{self.params['videoId']}   .comment_reply.csv")

In [17]:
# read data and save videos code to a list
def vid_list(address):
    ytvid = pd.read_csv(address)
    vid_cod = ytvid[['id', 'statistics.dislikeCount']]
    vid_cod = vid_cod.sort_values('statistics.dislikeCount', ascending=False)
    vid_cod.drop('statistics.dislikeCount', axis=1, inplace=True)
    return vid_cod

In [18]:
vid_list = pd.read_csv('not get comments yet.csv')
videos = set(vid_list)
videos = list(videos)
len(videos)

1

In [19]:
videos

['uretrfMA-Io']

In [20]:
print(len(videos))
for video in videos:
    print(video)
    a = VideoComment(maxResults=2000, videoId=video, key='AIzaSyCQHI5RSbHoKGh6sPPnnOywja7qLS6TXnA')
    a.get_video_comments()
    videos.remove(video)
    print(len(videos))

INFO:__main__:Fetching data


1
uretrfMA-Io


INFO:__main__:Found paginated response
INFO:__main__:Fetching paginated response & parsing


QURTSl9pM0FyYS1CYWFvQk5DR1pZNnhreVNQSDFDTnpGM2ZmOTlEaTE2SHlCaGpjaFRUdVd0TXJldVBLQWZod0owSnQ0NUVscllvbkV6dw==


INFO:__main__:Saving data as CSV at C:\Users\David\OneDrive\Documents\Social Media Analysis\Amber Heard Case\Youtube\SNA-AH-Case-YouTube\scraping/output/video-comments-csv
INFO:__main__:Saved data successfully


0


In [161]:
videos_copy = videos

In [166]:
vid_save = pd.DataFrame(videos)

In [167]:
vid_save.to_csv('not get comments yet.csv')