# Youtube

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser
import requests
import pandas as pd

API_KEY = ''
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_SERVICE_VERSION = 'v3'

youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_SERVICE_VERSION, developerKey = API_KEY)

In [None]:
search_something = youtube.search().list(
    q = 'Netflix',
    part = 'snippet',
    order = 'relevance',
    maxResults = 1,
).execute()

In [None]:
search_something

In [None]:
search_something['items'][0]

In [None]:
# 채널 아이디 값
channelId = 'UC920m3pMPH45qztdhppZhwA'

In [None]:
id = 'UCWOA1ZGywLbqmigxE4Qlvuw'

request = youtube.channels().list(part="snippet,contentDetails,statistics", id=id)
response = request.execute()                                    

channel_overview = {
    'title' : response['items'][0]['snippet']['title'],
    'description' : response['items'][0]['snippet']['description'],
    'publishedAt' : response['items'][0]['snippet']['publishedAt'],
    'viewCount' : response['items'][0]['statistics']['viewCount'],
    'subscriberCount' : response['items'][0]['statistics']['subscriberCount'],
    'videoCount' : response['items'][0]['statistics']['videoCount'],
    'uploads' : response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    }

df_channel_overview = pd.DataFrame([channel_overview])
df_channel_overview

nextPageToken
- nextPageToken은 Google API에서 페이지네이션(결과를 여러 페이지로 분할하여 반환)을 구현하는 데 사용되는 토큰입니다. 이 토큰은 이전 페이지에서 반환된 결과의 다음 페이지를 요청할 때 사용됩니다.

In [None]:
# Get all the video IDs from the channel and put them in a list
playlistId = df_channel_overview['uploads'].iloc[0]
video_ids = []

request = youtube.playlistItems().list(part="snippet,contentDetails", playlistId=playlistId, maxResults = 50)
response = request.execute()
nextPageToken = response.get('nextPageToken')

for item in response['items']:
    video_ids.append(item['contentDetails']['videoId'])

while nextPageToken is not None:
    request = youtube.playlistItems().list(part="snippet,contentDetails", playlistId=playlistId, maxResults = 50, pageToken = nextPageToken)
    response = request.execute()
    nextPageToken = response.get('nextPageToken')
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])

In [None]:
# Put video details in data frame
videos = []

for i in range(0, len(video_ids), 50):
    request = youtube.videos().list(part="snippet,contentDetails,statistics", id=video_ids[i:i+50])
    response = request.execute()

    for item in response['items']:
        video = {
            'channelTitle' : df_channel_overview['title'].iloc[0],
            'videoId' : item['id'],
            'categoryId' : item['snippet']['categoryId'],
            'publishedAt' : item['snippet']['publishedAt'],
            'title' : item['snippet']['title'],
            'description' : item['snippet']['description'],
            'tags' : item['snippet'].get('tags','no_tags'),
            'duration' : item['contentDetails']['duration'],
            'viewCount' : item['statistics'].get('viewCount',0),
            'likeCount' : item['statistics'].get('likeCount', 0),
            'commentCount' : item['statistics'].get('commentCount',0)
        }
        videos.append(video)
    
df_videos = pd.DataFrame(videos)
len(df_videos)

In [None]:
# https://sddev.tistory.com/88
import isodate
# Convert duration column to seconds
df_videos['duration_sec'] = df_videos['duration'].apply(lambda x: isodate.parse_duration(x).total_seconds())

# Convert specific columns to numeric type
numeric_columns = ['viewCount', 'likeCount', 'commentCount', 'duration_sec']
df_videos[numeric_columns] = df_videos[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Convert column to Datetime to access the year
df_videos['publishedAt'] = pd.to_datetime(df_videos['publishedAt'])
df_videos['year'] = df_videos['publishedAt'].dt.year

In [None]:
df_videos.head()

In [None]:
df_videos.columns = ['채널명','영상ID', '카테고리ID', '게시일' ,"제목", '설명', '출연자_정보', '재생시간'
                     ,'조회수', '좋아요수', '댓글수',  '재생시간(초)', '년도'] 

In [None]:
df_videos['년도'].value_counts()

In [None]:
# df_videos.to_csv('../data/df_videos.csv', sep=",", encoding="utf-8")

# publishedAt 열을 datetime 형식으로 변환
df_videos['게시일'] = pd.to_datetime(df_videos['게시일']).dt.tz_localize(None)

df_videos.to_excel('../data/df_videos.xlsx',index=False)

http://www.netflix.com
Iconic%20musician%20Tony%20Bennett%20looks%20back%20on%20his%20decades-long%20career,%20sharing%20thoughts%20on%20music,%20art,%20and%20life%20in%20The%20Zen%20of%20Bennett.%20Exclusively%20on%20Netflix,%20the%20documentary%20includes%20appearances%20by%20Amy%20Winehouse,%20Lady%20Gaga,%20Carrie%20Underwood,%20Andrea%20Bocelli,%20John%20Mayer,%20and%20Michael%20Buble.

With%20more%20than%2030%20million%20streaming%20members%20in%20the%20United%20States,%20Canada,%20Latin%20America,%20the%20United%20Kingdom,%20Ireland%20and%20the%20Nordics,%20Netflix,%20Inc.%20(NASDAQ:%20NFLX)%20is%20the%20world's%20leading%20
Internet%20subscription%20service%20for%20enjoying%20movies%20and%20TV%20programs.%20For%20one%20low%20monthly%20price,%20Netflix%20members%20can%20instantly%20watch%20movies%20and%20TV%20programs%20streamed%20over%20the%20Internet%20to%20PCs,%20Macs%20and%20TVs.%20

Cast:%20Tony%20Bennett,%20Amy%20Winehouse,%20Lady%20Gaga,%20Carrie%20Underwood,%20Andrea%20B

In [None]:
# df_videos.to_csv('../data/df_videos.csv', sep=",", encoding="utf-8")

# publishedAt 열을 datetime 형식으로 변환
df_videos['게시일'] = pd.to_datetime(df_videos['게시일']).dt.tz_localize(None)

df_videos.to_excel('../data/df_videos.xlsx',index=False)

---

In [9]:
df_videos = pd.read_csv('df_videos.csv', index_col = 0)

In [13]:
trailer = df_videos[df_videos['제목'].str.contains('Trailer|trailer', case=False)]

In [11]:
df_videos[df_videos['제목'].str.contains('JIVA')]

Unnamed: 0,채널명,영상ID,카테고리ID,게시일,제목,설명,출연자_정보,재생시간,조회수,좋아요수,댓글수,재생시간(초),년도
2860,Netflix,zGYOt0ZmifQ,24,2021-06-07 08:00:00+00:00,JIVA! | Official Trailer | Netflix,A talented street dancer struggles while juggl...,"['Netflix series', 'dance', 'jiva', 'netflix 2...",PT2M11S,316747,6552,335,131.0,2021
2892,Netflix,Y9LaKjImxBA,24,2021-05-24 08:00:00+00:00,JIVA! | Official Teaser | Netflix,"""This is our chance to shine."" As'hambeni. HAY...","['Netflix series', 'dance', 'jiva', 'netflix 2...",PT59S,70957,2208,195,59.0,2021


---

In [12]:
# 엑셀 파일을 읽어옵니다.
df = pd.read_excel('../data/imdb_final_3.xlsx', index_col=0)
df.head()

Unnamed: 0,title,english_title,year,total_season_num,runtime,genre,age_rating,age_miss,justwatch_us_age,production_country,...,season_13,season_14,season_15,season_16,season_17,director,writer,actor,imdb_title,imdb_url
0,Ojingeo Geim,Squid Game,2021,1,55,"액션, 드라마, 스릴러",19.0,18,TV-MA,대한민국,...,,,,,,['Hwang Dong-hyuk'],['Hwang Dong-hyuk'],"['Lee Jung-jae', 'Park Hae-soo', 'Yasushi Iwak...",Squid Game (TV Series 2021),https://www.imdb.com/title/tt10919420/
1,12인의 심판자,The Twelve,2019,2,54,"드라마, 스릴러",,18,TV-MA,벨기에,...,,,,,,"['Kaat Beels', 'Wouter Bouvijn']","['Bert Van Dael', 'Nele Meirhaeghe', 'Roel Mon...","['Luc De Ruelle', 'Maaike Neuville', 'Tom Verm...",The Twelve (TV Series 2019–2023),https://www.imdb.com/title/tt7605396/
2,희생자 게임,The Victims' Game,2020,1,60,"드라마, 범죄, 스릴러",19.0,18,TV-MA,대만,...,,,,,,"['David Chuang', 'Kuan-Chung Chen']","['Joyce Liu', 'Jui-Liang Hsu', 'Shih-Keng Chie...","['Wei-Ning Hsu', 'Hsiao-chuan Chang', 'Shih-Si...",The Victims' Game (TV Series 2020),https://www.imdb.com/title/tt12079212/
3,스위트 투스: 사슴뿔을 가진 소년,Sweet Tooth,2021,2,52,"SF, 드라마, 판타지, 액션",15.0,15,TV-14,미국,...,,,,,,"['Alexis Ostrander', 'Carol Banker', 'Ciarán F...","['Beth Schwartz', 'Bo Yeon Kim', 'Carly Woodwo...","['Nonso Anozie', 'Christian Convery', 'Stefani...",Sweet Tooth (TV Series 2021),https://www.imdb.com/title/tt12809988/
4,Sweet Home,Sweet Home,2020,2,60,"스릴러, 드라마, 판타지, 공포, SF, 범죄",,18,TV-MA,대한민국,...,,,,,,"['Jang Young-woo', 'Lee Eung-bok', 'Park So-hy...","['Hong So-ri', 'Hwang Young-Chan', 'Kim Hyung-...","['Song Kang', 'Lee Jin-wook', 'Lee Si-young', ...",Sweet Home (TV Series 2020),https://www.imdb.com/title/tt11612120/


In [14]:
trailer.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2947 entries, 0 to 7323
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   채널명      2947 non-null   object 
 1   영상ID     2947 non-null   object 
 2   카테고리ID   2947 non-null   int64  
 3   게시일      2947 non-null   object 
 4   제목       2947 non-null   object 
 5   설명       2947 non-null   object 
 6   출연자_정보   2947 non-null   object 
 7   재생시간     2947 non-null   object 
 8   조회수      2947 non-null   int64  
 9   좋아요수     2947 non-null   int64  
 10  댓글수      2947 non-null   int64  
 11  재생시간(초)  2947 non-null   float64
 12  년도       2947 non-null   int64  
dtypes: float64(1), int64(5), object(7)
memory usage: 322.3+ KB


In [18]:
import re  # re 모듈을 임포트

# youtube_df 초기화
youtube_df = pd.DataFrame()

# for 문을 사용하여 필터링
for title in df['english_title']:
    try:
        # 정규 표현식 이스케이프를 사용하여 title 처리
        escaped_title = re.escape(title)
        filtered_df = trailer[trailer['제목'].str.contains(escaped_title, case=False)]
        
        # 필터링된 결과가 없는 경우에도 해당 영화 제목 출력
        if filtered_df.empty:
            print(f"No data found for '{title}' Trailer")
        else:
            youtube_df = pd.concat([youtube_df, filtered_df], ignore_index=True)
            print(f"Filtered data for '{title}' Trailer:")
            print(filtered_df)
    except Exception as e:
        print(f"An error occurred while filtering for title '{title}': {e}")

# 필터링된 결과 확인
print("\nFinal filtered dataframe:")
print(youtube_df)

Filtered data for 'Squid Game' Trailer:
          채널명         영상ID  카테고리ID                        게시일  \
514   Netflix  W-Cc6XaneLs      24  2023-12-01 15:00:04+00:00   
515   Netflix  OzJ1aS317mg      24  2023-12-01 15:00:00+00:00   
652   Netflix  O61C8zc8Znk      24  2023-10-23 15:00:01+00:00   
2587  Netflix  oqxAJKy0ii4      24  2021-09-02 00:00:02+00:00   

                                                     제목  \
514   Squid Game: The Challenge | Who Will Win the 4...   
515   Squid Game: The Challenge | Who Will Win the 4...   
652   Squid Game: The Challenge | Official Trailer |...   
2587            Squid Game | Official Trailer | Netflix   

                                                     설명  \
514   The game that started with 456 players competi...   
515   The game that started with 456 players competi...   
652   Make friends. Make enemies. Make millions. Squ...   
2587  A Netflix Series | Squid Game \nSurvive or die...   

                                          

In [19]:
# youtube_df 및 no_data_df 초기화
youtube_df = pd.DataFrame()
no_data_df = pd.DataFrame(columns=['Title'])

# for 문을 사용하여 필터링
for title in df['english_title']:
    try:
        # 정규 표현식 이스케이프를 사용하여 title 처리
        escaped_title = re.escape(title)
        filtered_df = trailer[trailer['제목'].str.contains(escaped_title, case=False)]
        
        # 필터링된 결과가 없는 경우와 있는 경우를 분리하여 처리
        if filtered_df.empty:
            no_data_df = pd.concat([no_data_df, pd.DataFrame({'Title': [title]})], ignore_index=True)
        else:
            youtube_df = pd.concat([youtube_df, filtered_df], ignore_index=True)
    except Exception as e:
        print(f"An error occurred while filtering for title '{title}': {e}")

In [20]:
# 필터링된 결과가 없는 경우와 있는 경우의 데이터프레임 출력
print("Filtered dataframe with data:")
print(youtube_df)

Filtered dataframe with data:
          채널명         영상ID  카테고리ID                        게시일  \
0     Netflix  W-Cc6XaneLs      24  2023-12-01 15:00:04+00:00   
1     Netflix  OzJ1aS317mg      24  2023-12-01 15:00:00+00:00   
2     Netflix  O61C8zc8Znk      24  2023-10-23 15:00:01+00:00   
3     Netflix  oqxAJKy0ii4      24  2021-09-02 00:00:02+00:00   
4     Netflix  Os4R9IEBOA8      24  2024-05-21 16:00:03+00:00   
...       ...          ...     ...                        ...   
1269  Netflix  5yxjRgwYymg      24  2019-05-17 03:30:01+00:00   
1270  Netflix  W-Cc6XaneLs      24  2023-12-01 15:00:04+00:00   
1271  Netflix  OzJ1aS317mg      24  2023-12-01 15:00:00+00:00   
1272  Netflix  iDvPvqImb-4      24  2021-09-30 09:00:00+00:00   
1273  Netflix  KmtAZKXb6kU      24  2020-08-12 18:00:00+00:00   

                                                     제목  \
0     Squid Game: The Challenge | Who Will Win the 4...   
1     Squid Game: The Challenge | Who Will Win the 4...   
2     Squid 

In [21]:
print("\nTitles with no data found:")
print(no_data_df)



Titles with no data found:
                                 Title
0                           The Twelve
1                    The Victims' Game
2                      The Good Doctor
3                     The Walking Dead
4                            The Flash
..                                 ...
501   Close Your Eyes Before It's Dark
502                 Bhaag Beanie Bhaag
503                  Million Yen Women
504                            Hasmukh
505  Elite Short Stories: Nadia Guzmán

[506 rows x 1 columns]


### 쇼츠와 예고편 제거

In [None]:
short_cut_df = df[df['재생시간(초)'] > 60]