In [1]:
import time
import pandas as pd
from googleapiclient.discovery import build
from kaggle_secrets import UserSecretsClient

In [2]:
comments_count = pd.read_csv("/kaggle/input/comments-count-part1/part1.csv")
comments_count.shape

(400, 3)

In [3]:
comments_count.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Video ID       400 non-null    object 
 1   Comments       0 non-null      float64
 2   Comment Count  400 non-null    int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 9.5+ KB


In [4]:
comments_count.describe()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Comments,Comment Count
count,0.0,400.0
mean,,88.18
std,,95.71636
min,,1.0
25%,,11.0
50%,,46.0
75%,,137.25
max,,367.0


In [5]:
comments_count.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Video ID,Comments,Comment Count
0,gqht0qzWWko,,1
1,cX3VILC_4Qg,,1
2,LzdjTEFuc8o,,1
3,BH3t8GSR9OE,,1
4,ZlgxWwgTNNU,,1


## Total Comments to Fetch in all Video

In [6]:
comments_count["Comment Count"].sum()

35272

## Comments which has Comment Counts less than 100
* Fetching this in one request api hit

In [7]:
comments_count[comments_count["Comment Count"]<=100].count()

Video ID         263
Comments           0
Comment Count    263
dtype: int64

In [8]:
comments_count[comments_count["Comment Count"]<=100].sum()

Video ID         gqht0qzWWkocX3VILC_4QgLzdjTEFuc8oBH3t8GSR9OEZl...
Comments                                                       0.0
Comment Count                                                 7589
dtype: object

## Set the Api Key

In [9]:
user_secrets = UserSecretsClient()
API_KEY = user_secrets.get_secret("youtube_api_key")

## Initialize the YouTube API client

In [10]:
youtube = build('youtube', 'v3', developerKey=API_KEY)

In [11]:
comments_count['Comments'] = comments_count['Comments'].astype(str)
comments_count.head()

Unnamed: 0,Video ID,Comments,Comment Count
0,gqht0qzWWko,,1
1,cX3VILC_4Qg,,1
2,LzdjTEFuc8o,,1
3,BH3t8GSR9OE,,1
4,ZlgxWwgTNNU,,1


## Adjust Batch Size and Sleep Time Accordingly

In [None]:
# Function to fetch comments for a video
def fetch_comments(video_id, max_results=100):
    comments = []
    try:
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            maxResults=max_results
        )
        response = request.execute()
        
        # Extract comments from the first page
        for item in response['items']:
            comments.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])

        # Handle pagination (fetch all remaining comments)
        while 'nextPageToken' in response:
            request = youtube.commentThreads().list(
                part='snippet',
                videoId=video_id,
                textFormat='plainText',
                maxResults=max_results,
                pageToken=response['nextPageToken']
            )
            response = request.execute()
            for item in response['items']:
                comments.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])
    except Exception as e:
        print(f"Error fetching comments for video {video_id}: {e}")
    return comments

# Function to process videos in batches
def process_videos(df, batch_size=2):
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i + batch_size]
        for index, row in batch.iterrows():
            video_id = row['Video ID']
            comment_count = row['Comment Count']
            if row['Comments'] == 'nan' or row['Comments'] == 'None':  # Updated condition
                print(f"Fetching comments for video: {video_id} (Comment Count: {comment_count})")
                if comment_count <= 100:
                    # Fetch all comments in one request
                    comments = fetch_comments(video_id, max_results=100)
                else:
                    # Fetch comments in batches of 100
                    comments = fetch_comments(video_id, max_results=100)
                df.at[index, 'Comments'] = ', '.join(comments)  # Store comments as a single string
                time.sleep(1)  # Add a delay to avoid rate limits
            else:
                print(f"Skipping video {video_id} (comments already fetched)")
        
        # Save progress after each batch
        df.to_csv('comments_count_updated.csv', index=False)
        print(f"Processed batch {i // batch_size + 1}")

## Try for first 5 Rows

In [13]:
process_videos(comments_count[:5])


Fetching comments for video: gqht0qzWWko (Comment Count: 1)
Fetching comments for video: cX3VILC_4Qg (Comment Count: 1)
Fetching comments for video: LzdjTEFuc8o (Comment Count: 1)
Fetching comments for video: BH3t8GSR9OE (Comment Count: 1)
Fetching comments for video: ZlgxWwgTNNU (Comment Count: 1)
Processed batch 1


In [14]:
process_videos(comments_count)
print("All comments fetched and CSV updated successfully!")

Skipping video gqht0qzWWko (comments already fetched)
Skipping video cX3VILC_4Qg (comments already fetched)
Skipping video LzdjTEFuc8o (comments already fetched)
Skipping video BH3t8GSR9OE (comments already fetched)
Skipping video ZlgxWwgTNNU (comments already fetched)
Fetching comments for video: aXjeCpxdO_8 (Comment Count: 1)
Fetching comments for video: CqZeutIEjcs (Comment Count: 1)
Fetching comments for video: 85uPKxL6RhA (Comment Count: 1)
Fetching comments for video: DV4_4t5pwcA (Comment Count: 1)
Fetching comments for video: Vr2Fh4srIvQ (Comment Count: 1)
Fetching comments for video: AB220rGjvjw (Comment Count: 1)
Fetching comments for video: iDobc1umpUc (Comment Count: 1)
Fetching comments for video: PKDquwfSoJw (Comment Count: 1)
Fetching comments for video: CVldSgFnZyY (Comment Count: 1)
Fetching comments for video: bNB1CdKzzzk (Comment Count: 1)
Fetching comments for video: pEuVNxFeqpA (Comment Count: 2)
Fetching comments for video: Gh0sUcXuuJg (Comment Count: 2)
Fetching c