In [1]:
import os
os.chdir('..') # this resolves ImportError: attempted relative import with no known parent package
from dotenv import load_dotenv
load_dotenv()

import googleapiclient.discovery
import pandas as pd
from src.api.get_youtube_data import get_video_ids, get_video_data, get_top_level_comments
from tqdm.notebook import tqdm
import time

pd.set_option('display.max_colwidth', None)

In [4]:
# set parameters for creating a youtube service object
api_service_name = "youtube"
api_version = "v3"
API_KEY = os.environ.get("API_KEY")

# create a youtube service object
youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=API_KEY)

In [5]:
# set channel and search details
bandai_namco_america_id = "UC_ntXHv-XdKCD7CPynVvnQw"
published_after = "2023-11-01T00:00:00Z"
published_before = "2023-12-21T00:00:00Z"
search_term = "tekken"

In [7]:
# get video ids
video_ids = get_video_ids(youtube_service_object=youtube, 
                          channel_id=bandai_namco_america_id,
                          published_before=published_before,
                          published_after=published_after,
                          search_term=search_term)
print(video_ids)
print(len(video_ids))

['UgnPG2bScVQ', '9jJiNa4HoD0', 'X1dgCe1jDYg', 'EMZkmjE8wdw', 'ToKJfywbe1o', '9D5vq-zq9y4', 'y8JGUIF2pu4', 'oeFfzCWif-Q', 'bSCANspTDeE', 'ucesGynb2Yk', 'UcBcNOSoFzI', '8DVlK_QrZ-A', 'Zc-yMi05vBA', 'e1N4juHVqNc', 'QH6s_o3dIic', '7skTtnpSb58', 'bjzYbEjE-C4', 'weVrUBszFIM', 'Gw5nQaSF0CI', '3pGxqOFmIN4', 'w0IqzD-gUOI', 'PsCpewoF2E4', 'cHnxJplTQuY', 'qbUnCiTMCGE', 'cIDK50IaVpg', 'rDxrpSqYHD8']
26


In [8]:
df_video_data = get_video_data(youtube_service_object=youtube, video_ids=video_ids)
df_video_data.head()

0it [00:00, ?it/s]

Batch 1 start: 0
Batch 1 end: 26


Unnamed: 0,channelTitle,channelId,videoId,publishedAt,title,description,tags,viewCount,likeCount,commentCount,favoriteCount
0,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,rDxrpSqYHD8,2023-11-01 16:09:18+00:00,TEKKEN 8 – THE RETURN OF LEGENDS - NEW CHARACT...,Five legends return in #TEKKEN8 for the next K...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",867777,24594,2809,0
1,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,cIDK50IaVpg,2023-11-02 13:22:49+00:00,TEKKEN 8 – Victor Chevalier Reveal & Gameplay ...,"With him, violence is à la carte.\nVictor Chev...","[Bandai Namco, Bandai Namco Entertainment, Vid...",1323928,42405,7290,0
2,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,PsCpewoF2E4,2023-11-13 05:03:26+00:00,TEKKEN 8 — Reina Reveal & Gameplay Trailer,It's time for them to learn their place.\nRein...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",1889207,57277,7538,0
3,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,QH6s_o3dIic,2023-11-22 14:00:30+00:00,TEKKEN 8 — Leo Reveal & Gameplay Trailer,It's time to punch the truth out of them. Leo ...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",822368,32381,5153,0
4,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,Zc-yMi05vBA,2023-11-29 14:00:33+00:00,TEKKEN 8 – Steve Fox Reveal & Gameplay Trailer,Time to knock out the competition🥊\nSteve will...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",915878,37277,4430,0


In [9]:
df_video_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   channelTitle   11 non-null     object             
 1   channelId      11 non-null     object             
 2   videoId        11 non-null     object             
 3   publishedAt    11 non-null     datetime64[ns, UTC]
 4   title          11 non-null     object             
 5   description    11 non-null     object             
 6   tags           11 non-null     object             
 7   viewCount      11 non-null     int64              
 8   likeCount      11 non-null     int64              
 9   commentCount   11 non-null     int64              
 10  favoriteCount  11 non-null     int64              
dtypes: datetime64[ns, UTC](1), int64(4), object(6)
memory usage: 1.1+ KB


In [12]:
df_video_data.loc[:][["title", "commentCount", "videoId"]]

Unnamed: 0,title,commentCount,videoId
0,TEKKEN 8 – THE RETURN OF LEGENDS - NEW CHARACTERS REVEAL TRAILER,2809,rDxrpSqYHD8
1,TEKKEN 8 – Victor Chevalier Reveal & Gameplay Trailer,7290,cIDK50IaVpg
2,TEKKEN 8 — Reina Reveal & Gameplay Trailer,7538,PsCpewoF2E4
3,TEKKEN 8 — Leo Reveal & Gameplay Trailer,5153,QH6s_o3dIic
4,TEKKEN 8 – Steve Fox Reveal & Gameplay Trailer,4430,Zc-yMi05vBA
5,TEKKEN 8 — Dragunov Reveal & Gameplay Trailer,3290,ucesGynb2Yk
6,TEKKEN 8 — Yoshimitsu Reveal & Gameplay Trailer,4815,y8JGUIF2pu4
7,TEKKEN 8 Exclusive Story Demo Showcase,361,9D5vq-zq9y4
8,TEKKEN 8 – Official Story Trailer,4038,ToKJfywbe1o
9,TEKKEN 8 – Ultimate Edition Trailer,1219,9jJiNa4HoD0


In [18]:
# define video ids we want to get comments for
# filter dataframe to get only the row with the ultimate edition trailer video
df_filtered = df_video_data.loc[df_video_data["title"].str.lower().str.contains("characters reveal")]

df_filtered.head()

Unnamed: 0,channelTitle,channelId,videoId,publishedAt,title,description,tags,viewCount,likeCount,commentCount,favoriteCount
0,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,rDxrpSqYHD8,2023-11-01 16:09:18+00:00,TEKKEN 8 – THE RETURN OF LEGENDS - NEW CHARACTERS REVEAL TRAILER,"Five legends return in #TEKKEN8 for the next King of Iron Fist tournament!\n\nAnd we're not done yet. 👊 http://tekken.com\n\nTake a look at the latest fighters newly confirmed as playable in Tekken 8: Devil Jin, Zafina, Alisa Bosconovich, and Lee Chaolan (with a bonus appearance by Panda). The final characters will be revealed on November 2 and then November 12.","[Bandai Namco, Bandai Namco Entertainment, Video, Games, video games, namco bandai, United States, PS5, PS4, Xbox Series X]",867777,24594,2809,0


In [19]:
new_characters_reveal_id = df_filtered["videoId"][0]
new_characters_reveal_id

'rDxrpSqYHD8'

In [20]:
df_new_character_reveal_comments = get_top_level_comments(youtube_service_object=youtube, video_ids=characters_revealed_id)
df_new_character_reveal_comments.head()

Unnamed: 0,videoId,authorDisplayName,publishedAt,updatedAt,likeCount,totalReplyCount,textDisplay
0,rDxrpSqYHD8,@silveriver9,2023-11-01 16:09:58+00:00,2023-11-01 16:10:43+00:00,4,4,First. Now where is LEI WULONG?!
1,rDxrpSqYHD8,@faizaanjaved7150,2023-11-01 16:10:05+00:00,2023-11-01 16:10:05+00:00,1,1,Already seen it. Ur getting less view&#39;s now bamco
2,rDxrpSqYHD8,@TS-rw4lk,2023-11-01 16:10:05+00:00,2023-11-01 16:10:05+00:00,0,0,wow
3,rDxrpSqYHD8,@ALONCAK,2023-11-01 16:10:06+00:00,2023-11-01 16:10:06+00:00,0,0,Oww yeaah
4,rDxrpSqYHD8,@Rough_Estimates,2023-11-01 16:10:06+00:00,2023-11-01 16:10:06+00:00,135,14,I hope we get an angel version of Jin
...,...,...,...,...,...,...,...
2036,rDxrpSqYHD8,@muhammadrafaythaheem9731,2023-12-24 07:57:43+00:00,2023-12-24 07:57:43+00:00,0,0,Maaaarveroooosssse 🤣🤣🤣
2037,rDxrpSqYHD8,@helikoptergezgini9728,2023-12-25 21:27:41+00:00,2023-12-25 21:27:41+00:00,0,0,what you call new is in the game for almost all Tekken games. Are you kiddin me?!?! Nothin new here. Where is Eddy! Where are all the other great characters. I m just watching a sinking game. Too bad after Tekken 7 we got this. Its must be a joke...
2038,rDxrpSqYHD8,@pureOwarrior,2023-12-26 19:34:16+00:00,2023-12-26 19:34:16+00:00,0,0,Wished if this was Lee actual rage art :(
2039,rDxrpSqYHD8,@369dakuza,2023-12-27 20:24:12+00:00,2023-12-27 20:24:12+00:00,0,0,marduk? armor king? common... my mains aint in my main game? like wtf im not gonna buy until ltheir dlc come out.... very disappointed


In [21]:
df_new_character_reveal_comments.to_csv("data/raw/new_character_reveal_comments.csv", index=False)

# Combine dataframes
- Combine the comments data with the video data so that we can house all the data about a video together
- This is a nice to have and so can come back to this once have done the comment cleaning and NLP machine learning.

In [None]:
# join based on videoId - want to add title, like count etc.