In [4]:

# -*- coding: utf-8 -*-

import os

import google.oauth2.credentials

import google_auth_oauthlib.flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow

# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret.
CLIENT_SECRETS_FILE = "src_client_secret.json"

# This OAuth 2.0 access scope allows for full read/write access to the
# authenticated user's account and requires requests to use an SSL connection.
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'

def get_authenticated_service():
  flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRETS_FILE, SCOPES)
  credentials = flow.run_console()
  return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)

# Build a resource based on a list of properties given as key-value pairs.
# Leave properties with empty values out of the inserted resource.
def build_resource(properties):
  resource = {}
  for p in properties:
    # Given a key like "snippet.title", split into "snippet" and "title", where
    # "snippet" will be an object and "title" will be a property in that object.
    prop_array = p.split('.')
    ref = resource
    for pa in range(0, len(prop_array)):
      is_array = False
      key = prop_array[pa]

      # For properties that have array values, convert a name like
      # "snippet.tags[]" to snippet.tags, and set a flag to handle
      # the value as an array.
      if key[-2:] == '[]':
        key = key[0:len(key)-2:]
        is_array = True

      if pa == (len(prop_array) - 1):
        # Leave properties without values out of inserted resource.
        if properties[p]:
          if is_array:
            ref[key] = properties[p].split(',')
          else:
            ref[key] = properties[p]
      elif key not in ref:
        # For example, the property is "snippet.title", but the resource does
        # not yet have a "snippet" object. Create the snippet object here.
        # Setting "ref = ref[key]" means that in the next time through the
        # "for pa in range ..." loop, we will be setting a property in the
        # resource's "snippet" object.
        ref[key] = {}
        ref = ref[key]
      else:
        # For example, the property is "snippet.description", and the resource
        # already has a "snippet" object.
        ref = ref[key]
  return resource

# Remove keyword arguments that are not set
def remove_empty_kwargs(**kwargs):
  good_kwargs = {}
  if kwargs is not None:
    for key, value in kwargs.items():
      if value:
        good_kwargs[key] = value
  return good_kwargs

def videos_list_my_rated_videos(client, **kwargs):
  # See full sample for function
  kwargs = remove_empty_kwargs(**kwargs)

  response = client.videos().list(
    **kwargs
  ).execute()

  return response


if __name__ == '__main__':
  # When running locally, disable OAuthlib's HTTPs verification. When
  # running in production *do not* leave this option enabled.
  os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
  client = get_authenticated_service()
    

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=93523413584-548o4150r16vuh2a3etsbm7ea10v3mj7.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.force-ssl&state=ePe5ed1c2OG2n81STphSP8oCCPNo5e&prompt=consent&access_type=offline
Enter the authorization code: 4/BQEzRx4wMv2esrSEhvITznAIWO5HXBI19Fx72Nvsu7DVicCyRqGUGZ0


In [5]:
r = videos_list_my_rated_videos(client,
part='snippet,contentDetails,statistics',
myRating='like')
  

In [6]:
r['pageInfo']


{'totalResults': 752, 'resultsPerPage': 5}

### let's extract all the liked videos, store in a df and explore!

In [7]:
import pandas as pd
columns=['id','title','publishedAt','categoryId','viewCount','likeCount','dislikeCount']
df = pd.DataFrame(columns=columns)
# df.loc[len(df)] = [2, 4, 5,6]
# df.loc[len(df)] = [2, 4, 5,7]
# df.loc[len(df)] = [2, 4, 5,6]

df

Unnamed: 0,id,title,publishedAt,categoryId,viewCount,likeCount,dislikeCount


In [8]:
r = videos_list_my_rated_videos(client,
    part='snippet,contentDetails,statistics',
    myRating='like')

i = 0
# nextpageToken gets pointer to next page
while 'nextPageToken' in r.keys() and r['nextPageToken']!="" :
    i = i+1
    if i%10==0:
        print(i)
    
    r = videos_list_my_rated_videos(client,
    part='snippet,contentDetails,statistics',
    pageToken=r['nextPageToken'], 
    myRating='like')
    
    for item in r['items']:
        try:
            df.loc[len(df)] = [
                item['id'],
                item['snippet']['title'],
                item['snippet']['publishedAt'][:10],
                item['snippet']['categoryId'],
                item['statistics']['viewCount'],
                item['statistics']['likeCount'],
                item['statistics']['dislikeCount']
            ]
        except Exception:
            df.loc[len(df)] = [
                item['id'],
                item['snippet']['title'],
                item['snippet']['publishedAt'][:10],
                item['snippet']['categoryId'],
                item['statistics']['viewCount'],
                0,
                0
            ]

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150


In [15]:
df.describe()
# interesting, we have about 371 songs (category 10), let's extarct them out and save it to the other channel

Unnamed: 0,id,title,publishedAt,categoryId,viewCount,likeCount,dislikeCount
count,690,690,690,690,690,690,690
unique,690,688,542,15,690,676,482
top,Eww37rpEyfs,White Iverson,2018-04-27,10,49785626,55,5
freq,1,2,6,371,1,3,9


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 690 entries, 0 to 689
Data columns (total 7 columns):
id              690 non-null object
title           690 non-null object
publishedAt     690 non-null object
categoryId      690 non-null object
viewCount       690 non-null object
likeCount       690 non-null object
dislikeCount    690 non-null object
dtypes: object(7)
memory usage: 43.1+ KB


In [17]:
df.head()

Unnamed: 0,id,title,publishedAt,categoryId,viewCount,likeCount,dislikeCount
0,s9WQbnaMR0c,習譜予 - 等下一個他「 官方MV版 」♪Karendaidai♪,2018-05-08,10,30336,500,13
1,hyqEI9N-ya0,馮提莫 waiting him《等下一個他》,2019-01-03,22,2475,49,4
2,KGrulNiJ6Uc,Kimberley陳芳語《愛你AINI》Official MV(HD),2012-04-23,10,97566381,236914,7322
3,Oayma-ggbUk,宇多田ヒカル - 「First Love」ジャケット写真撮影風景(1998年）,2015-02-07,10,270780,1246,91
4,AnjSjJOEfSc,"Japan’s Master Inventor Has Over 3,500 Patents",2019-03-01,22,988508,33449,620


In [28]:
music_df = df.loc[df['categoryId'] == '10']

In [30]:
music_df.to_csv('liked_videos.csv', sep=',', encoding='utf-8',index=False)

# let's explore the video categories


In [50]:
def video_categories_list(client, **kwargs):
  # See full sample for function
  kwargs = remove_empty_kwargs(**kwargs)

  response = client.videoCategories().list(
    **kwargs
  ).execute()

  return response

In [52]:
vid_cat_repsonse= video_categories_list(client,
    part='snippet',
    regionCode='US')

In [60]:
video_category_map = {}
for item in vid_cat_repsonse['items']:
    video_category_map[item['id']]=item['snippet']['title']
video_category_map    

{'1': 'Film & Animation',
 '2': 'Autos & Vehicles',
 '10': 'Music',
 '15': 'Pets & Animals',
 '17': 'Sports',
 '18': 'Short Movies',
 '19': 'Travel & Events',
 '20': 'Gaming',
 '21': 'Videoblogging',
 '22': 'People & Blogs',
 '23': 'Comedy',
 '24': 'Entertainment',
 '25': 'News & Politics',
 '26': 'Howto & Style',
 '27': 'Education',
 '28': 'Science & Technology',
 '29': 'Nonprofits & Activism',
 '30': 'Movies',
 '31': 'Anime/Animation',
 '32': 'Action/Adventure',
 '33': 'Classics',
 '34': 'Comedy',
 '35': 'Documentary',
 '36': 'Drama',
 '37': 'Family',
 '38': 'Foreign',
 '39': 'Horror',
 '40': 'Sci-Fi/Fantasy',
 '41': 'Thriller',
 '42': 'Shorts',
 '43': 'Shows',
 '44': 'Trailers'}

I wonder if youtube music only contains category id 10?

In [61]:
# let's test this out by parsing our liked videos first

### let's save the df to a csv, then add the videos in the csv to our other youtube channel