### Imports

In [None]:
from googleapiclient.discovery import build
import pandas as pd
from google.colab import files, drive
import getpass
from pd_replicator import replicator

## User Input

In [None]:
api_key = getpass.getpass('Please enter your YouTube API key: ')
playlist_ids = ['PLqZX_SGiDGNvoZSDlTsqcPdyKN_yVct-Y']


Please enter your YouTube API key: ··········


In [None]:
# Build the YouTube client
youtube = build('youtube', 'v3', developerKey=api_key)

## Get Video IDs for Playlist

In [None]:
def get_all_video_ids_from_playlists(youtube, playlist_ids):
    all_videos = []  # Initialize a single list to hold all video IDs

    for playlist_id in playlist_ids:
        next_page_token = None

        # Fetch videos from the current playlist
        while True:
            playlist_request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token)
            playlist_response = playlist_request.execute()

            all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]

            next_page_token = playlist_response.get('nextPageToken')

            if next_page_token is None:
                break

    return all_videos

# Fetch all video IDs from the specified playlists
video_ids = get_all_video_ids_from_playlists(youtube, playlist_ids)

# Now you can pass video_ids to the next function
# next_function(video_ids)

## Get All Comments

In [None]:
# Function to get replies for a specific comment
def get_replies(youtube, parent_id, video_id):  # Added video_id as an argument
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': comment['textDisplay'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies

# Function to get all comments (including replies) for a single video
def get_comments_for_video(youtube, video_id):
    all_comments = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            pageToken=next_page_token,
            textFormat="plainText",
            maxResults=100
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            all_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_id,  # Directly using video_id from function parameter
                'Comment': top_comment['textDisplay'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return all_comments

# List to hold all comments from all videos
all_comments = []


for video_id in video_ids:
    video_comments = get_comments_for_video(youtube, video_id)
    all_comments.extend(video_comments)

# Create DataFrame
comments_df = pd.DataFrame(all_comments)


In [None]:
comments_df

Unnamed: 0,Timestamp,Username,VideoID,Comment,Date
0,2024-07-07T23:03:59Z,@Jul1a.fxw5,QPF1bCC2BFw,Encontro de dois gigantes ❤❤❤\nFlamengo & a to...,2024-07-07T23:03:59Z
1,2024-07-07T11:56:42Z,@NoemiOliveira-kf7cl,QPF1bCC2BFw,atlético campeão mineiro 2024\n X...,2024-07-07T11:56:42Z
2,2024-07-07T07:14:16Z,@gabrielgripp5883,QPF1bCC2BFw,Atlético fez estádio pra apanhar em casa decorada,2024-07-07T07:14:16Z
3,2024-07-07T02:26:04Z,@cezar.bastos3593,QPF1bCC2BFw,Porque vocês não pois o replay do lance do car...,2024-07-07T02:26:04Z
4,2024-07-07T00:00:14Z,@reginaldoferreiradasilva2191,QPF1bCC2BFw,O Flamengo está e fudido. com CBF. E Glob...,2024-07-07T00:04:01Z
...,...,...,...,...,...
4763,2024-07-04T02:54:57Z,@Gustavo_Lyra,QPF1bCC2BFw,Eu só vi o segundo tempo do jogo,2024-07-04T02:54:57Z
4764,2024-07-04T03:01:10Z,@Danzinhogm,QPF1bCC2BFw,"Pelo menos viu dois gols ainda, e o golaço do ...",2024-07-04T03:01:10Z
4765,2024-07-04T03:04:27Z,@fnzinnxd6712,QPF1bCC2BFw,Eu perdi.. acabei dormindo tava cansadasso,2024-07-04T03:04:27Z
4766,2024-07-04T03:04:31Z,@torcedor2737,QPF1bCC2BFw,@@RyanGMN o Bruno Henrique joga muito hein amigão,2024-07-04T03:04:31Z


In [None]:
comments_df['Comment'].value_counts().head(30)

Comment
we                                                                                         18
er                                                                                         18
qa                                                                                         14
ws                                                                                         14
qw                                                                                         13
rt                                                                                         13
ed                                                                                         13
uj                                                                                         12
bg                                                                                         12
rf                                                                                         12
yh                                                  

In [None]:
comments_df.groupby('Username').count().reset_index().sort_values(by='Comment', ascending=False)

Unnamed: 0,Username,Timestamp,VideoID,Comment,Date
1426,@margo2308,233,233,233,233
1290,@katia10276,231,231,231,231
1704,@souzaaca,140,140,140,140
367,@Lucci_nerim,132,132,132,132
838,@cortesz1,109,109,109,109
...,...,...,...,...,...
718,@antoniomartins7168,1,1,1,1
717,@antoniomarcosbergamin1428,1,1,1,1
716,@antonijeff2159,1,1,1,1
715,@anthonyryan7201,1,1,1,1


In [None]:
margo = comments_df[comments_df['Username'] == "@margo2308"]['Comment'].value_counts()
margo.to_csv('margo_comments.csv')

### Output to CSV

In [None]:
# Export whole dataset to the local machine as CSV File
csv_file = 'comments_data.csv'  # Name your file
comments_df.to_csv(csv_file, index=False)

from google.colab import files

# Trigger a download to your local machine
files.download(csv_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
## Função para pegar respostas


## Função para pegar respostas
```
```



In [None]:
def get_replies(youtube, parent_id, video_id):
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",  # Corrected to "plainText"
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': comment['textDisplay'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies

# Function to get all comments (excluding replies) for a single video
def get_comments_for_video(youtube, video_id):
    top_comments = []
    replies = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            pageToken=next_page_token,
            textFormat="plainText",  # Corrected to "plainText"
            maxResults=100
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            top_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': top_comment['textDisplay'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                replies.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return top_comments, replies

# Lists to hold all comments and replies from all videos
all_top_comments = []
all_replies = []

for video_id in video_ids:
    video_top_comments, video_replies = get_comments_for_video(youtube, video_id)
    all_top_comments.extend(video_top_comments)
    all_replies.extend(video_replies)

# Create DataFrames
top_comments_df = pd.DataFrame(all_top_comments)
replies_df = pd.DataFrame(all_replies)

In [None]:
replies_df

Unnamed: 0,Timestamp,Username,VideoID,Comment,Date
0,2024-07-05T01:21:12Z,@danielesilva3293,QPF1bCC2BFw,😂,2024-07-05T01:21:12Z
1,2024-07-04T18:00:46Z,@wuesleymota5166,QPF1bCC2BFw,É um cassino que está com um bug e geral ta ap...,2024-07-04T18:00:46Z
2,2024-07-04T18:01:08Z,@wuesleymota5166,QPF1bCC2BFw,↖️↖️↖️↖️⬅️⬅️⬅️fiz um vídeo explicando,2024-07-04T18:01:08Z
3,2024-07-04T18:24:41Z,@triviafamilly1.5.75,QPF1bCC2BFw,Chorão kkkkkkkkkkkkkkkiiiiiiiiiiiiiiikkkkk os ...,2024-07-04T18:24:41Z
4,2024-07-04T17:34:25Z,@eurico633,QPF1bCC2BFw,chola mais varmeirense 😭 bué bué bué,2024-07-04T17:34:25Z
...,...,...,...,...,...
2739,2024-07-04T02:54:57Z,@Gustavo_Lyra,QPF1bCC2BFw,Eu só vi o segundo tempo do jogo,2024-07-04T02:54:57Z
2740,2024-07-04T03:01:10Z,@Danzinhogm,QPF1bCC2BFw,"Pelo menos viu dois gols ainda, e o golaço do ...",2024-07-04T03:01:10Z
2741,2024-07-04T03:04:27Z,@fnzinnxd6712,QPF1bCC2BFw,Eu perdi.. acabei dormindo tava cansadasso,2024-07-04T03:04:27Z
2742,2024-07-04T03:04:31Z,@torcedor2737,QPF1bCC2BFw,@@RyanGMN o Bruno Henrique joga muito hein amigão,2024-07-04T03:04:31Z


In [None]:
replies_df['Comment'].value_counts().head(30)



Comment
er    18
we    18
ws    14
qa    14
ed    13
rt    13
qw    13
uj    12
rf    12
yh    12
bg    12
nh    12
dc    11
tg    11
sx    11
cd    10
za    10
sw    10
fr    10
ik    10
ty    10
az    10
fv    10
aq     9
de     9
xs     9
mj     9
vf     9
hn     8
ol     8
Name: count, dtype: int64

In [None]:
comentarios_comuns = replies_df.groupby('Comment')['Username'].count().reset_index(name="Conta").sort_values(by='Conta', ascending=False)
comentarios_comuns

Unnamed: 0,Comment,Conta
1507,er,18
1691,we,18
1636,qa,14
1694,ws,14
1502,ed,13
...,...,...
702,Fa5,1
701,FLAMENGO PAI DA ARENA DAS GALINHAS 😂,1
700,FEXXXTA NO GALINHEIRO🥳,1
699,F77,1


In [None]:
def categorize_comment(freq):
    if freq == 1:
        return "Not repeated commentary"
    elif 1 < freq < 10:
        return "Repeated comment"
    elif freq >= 10:
        return "Comment repeated more than 10 times"

# Aplicar a função à coluna 'frequency' para criar a nova coluna 'category'
comentarios_comuns['category'] = comentarios_comuns['Conta'].apply(categorize_comment)
comentarios_comuns['category'].value_counts()

category
Not repeated commentary                1844
Repeated comment                        209
Comment repeated more than 10 times      23
Name: count, dtype: int64

In [None]:
comentarios_comuns.to_csv('comentarios_comuns.csv')

In [None]:
comentarios_comuns[comentarios_comuns['Conta'] == 1 ]

Unnamed: 0,Comment,Conta
1404,W4,1
1082,"O do Pedro não foi, mas o do Pulgar foi muito.",1
1080,O WESLEY TBM.👍🏽,1
1375,Vc e anti flamenguista e estar na Live eu sei ...,1
1382,"Vdd, mas contra tudo e contra todos, Mengão se...",1
...,...,...
702,Fa5,1
701,FLAMENGO PAI DA ARENA DAS GALINHAS 😂,1
700,FEXXXTA NO GALINHEIRO🥳,1
699,F77,1


In [None]:
teste = replies_df.groupby(['Username'])['Comment'].count().reset_index(name="Quantidade de comentários").sort_values(by='Quantidade de comentários', ascending=False)
teste
b

Unnamed: 0,Username,Quantidade de comentários
493,@margo2308,233
448,@katia10276,231
578,@souzaaca,140
159,@Lucci_nerim,132
321,@cortesz1,109
...,...,...
266,@Zyech1,1
265,@Zunzas,1
264,@ZeerojhoN,1
261,@YOSHI_ZIN,1


In [None]:
def categorize_comment(freq):
    if freq == 1:
        return "User did not repeated replies"
    elif 1 < freq < 10:
        return "User repeated comment a few times"
    elif freq >= 10:
        return "User repeated the comment more than 10 times"

# Aplicar a função à coluna 'frequency' para criar a nova coluna 'category'
teste['category'] = teste['Quantidade de comentários'].apply(categorize_comment)
teste['category'].value_counts()

category
User did not repeated replies                   433
User repeated comment a few times               178
User repeated the comment more than 10 times     27
Name: count, dtype: int64

In [None]:
teste.to_csv('perfis_comentarios.csv')

In [None]:
teste[teste['Quantidade de comentários'] > 10]

Unnamed: 0,Username,Quantidade de comentários
493,@margo2308,233
448,@katia10276,231
578,@souzaaca,140
159,@Lucci_nerim,132
321,@cortesz1,109
163,@LunaGroove-jn9iq,95
479,@mag-ey7ms,94
288,@aoaoooa7044,90
243,@TheHolyWhisper,84
519,@negociosonlinehoje9129,72


In [None]:
replies_df[replies_df['Username'] == "@margo2308"]['Comment'].value_counts()




Comment
qa     11
ed     10
uj      9
qw      8
er      8
tg      8
az      8
dc      8
bg      8
cd      8
za      7
aq      7
gb      7
de      6
we      6
df      5
jm      5
ol      5
rt      5
ur      4
ye      4
mj      4
gh      4
ty      4
ki      4
gt      4
ju      4
hy      4
as      3
fr      3
sw      3
pç      3
ws      3
rf      3
yh      3
tw      3
ik      2
çp      2
jk      2
it      2
lo      2
bn      2
sa      2
yu      2
re      2
q       2
cv      2
k,      1
a       1
,k      1
ui      1
z       1
zx      1
sxa     1
xz      1
oy      1
wq      1
hn      1
dew     1
Name: count, dtype: int64

In [None]:
# Export whole dataset to the local machine as CSV File
csv_file = 'comments_data.csv'  # Name your file
replies_df.to_csv(csv_file, index=False)

from google.colab import files

# Trigger a download to your local machine
files.download(csv_file)