## This notebook explores the data (TFRecord format) using a subsample of the YouTube-8M video level.
## To work with the entire dataset, please refer to the Starter code on the [YouTube-8M github repo](https://github.com/google/youtube-8m).

## Invoke necessary dependencies

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

# linear algebra
import numpy as np 

# data processing, CSV file I/O (e.g. pd.read_csv)
import pandas as pd 

#Loading libraries & datasets
import tensorflow as tf

# Input data files should be available in the "/input/" directory.
import os
import sys

from urllib.request import urlopen
import youtube_dl

# Any results you write to the current directory are saved as output.

## Create helper functions

In [2]:
# This function collects the data provided by youtube-dl, such as rendition tables, number of views, etc.
def get_metadata(video_id: str) -> str or None:
    url = 'https://www.youtube.com/watch?v=' + video_id
    ydl = youtube_dl.YoutubeDL({'outtmpl': '%(id)s%(ext)s'})
    try:
        with ydl:
            result = ydl.extract_info(url, download=False)
            return result
    except youtube_dl.utils.DownloadError:
        return None

In [3]:
# For privacy reasons the video IDs in the dataset were provided with a codification. 
# Instructions and further information are available here:
#      https://research.google.com/youtube8m/video_id_conversion.html
def get_real_id(random_id: str) -> str:
    url = 'http://data.yt8m.org/2/j/i/{}/{}.js'.format(random_id[0:2], random_id)
    request = urlopen(url).read()
    real_id = request.decode()
    return real_id[real_id.find(',') + 2:real_id.find(')') - 1]

In [4]:
# We need this function to filter out the fields of metadata we won't be using about each video
def without_keys(d):
    return {x: d[x] for x in d if x in wanted_data}

## Bring in the TensorFlow records

These records are organized by chunks or "shards" in the YT8M website. 
Instruction on how to get them is available here: https://research.google.com/youtube8m/download.html
As it takes almost 96GB of our valuable hard disk, we have only ran experiments over one of them for this article.

In [5]:
# The path to the TensorFlow record
video_lvl_record = "input/train00.tfrecord"

## Iterate the records to obtain labels in the video level

In [6]:
vid_ids = []
labels = []

data = pd.DataFrame()
wanted_data = ['format', 'quality']

# Iterate the contents of the TensorFlow record
for example in tf.python_io.tf_record_iterator(video_lvl_record):
    
    # A TensoFlow Example is a mostly-normalized data format for storing data for
    # training and inference.  It contains a key-value store (features); where
    # each key (string) maps to a Feature message (which is oneof packed BytesList,
    # FloatList, or Int64List). Features for this data set are:
    #     -id
    #     -labels
    #     -mean_audio
    #     -mean_rgb
    tf_example = tf.train.Example.FromString(example)
    
    # Once we have the structured data, we can extract the relevant features (id and labels)
    vid_ids.append(tf_example.features.feature['id'].bytes_list.value[0].decode(encoding='UTF-8'))
    pseudo_id = tf_example.features.feature['id'].bytes_list.value[0].decode(encoding='UTF-8')
    labels = tf_example.features.feature['labels'].int64_list.value
    audio = tf_example.features.feature['mean_rgb'].int64_list.value
    
    # The id provided from the TensoFlow example needs some processing in order to build a valid link to a 
    # YouTube video
    try:
        real_id = get_real_id(pseudo_id)

        # Get the youtube-dl valuable metadata
        data_video = get_metadata(real_id)
    except:
        e = sys.exc_info()
        
    if data_video:
        
        # We are interested in expanding the labels information with features such as title, 
        # creator, number of views and duration
        title = data_video['title']
        creator = data_video['creator']
        view_count = data_video['view_count']
        duration = data_video['duration']
        
        # youtube-dl library supplies data regarding formats mixed for both audio and video.
        # We are only interested in mp4 inputs, so we need to separate
        formats_dict = []
        for format_type in data_video['formats']:
            try:
                if(format_type['ext'] == 'mp4'):
                    formats_dict.append({format_type['format']:format_type['tbr']})
            except:
                e = sys.exc_info()
        
                
        # Collect the data in the dataframe
        data = data.append({'id': real_id, 
                            'ladder': formats_dict, 
                            'title': title, 
                            'creator': creator, 
                            'views': view_count,
                            'duration': duration,
                            'labels': labels},
                            ignore_index=True)


[youtube] eguZ69v_vlQ: Downloading webpage
[youtube] eguZ69v_vlQ: Downloading video info webpage
[youtube] eguZ69v_vlQ: Downloading MPD manifest
[youtube] eguZ69v_vlQ: Downloading MPD manifest
[youtube] ER9Hdp04tWs: Downloading webpage
[youtube] ER9Hdp04tWs: Downloading video info webpage
[youtube] ETF2-Zz3J18: Downloading webpage
[youtube] ETF2-Zz3J18: Downloading video info webpage
[youtube] jtvbLq9bYRc: Downloading webpage
[youtube] jtvbLq9bYRc: Downloading video info webpage
[youtube] 6BPXQMxdHog: Downloading webpage
[youtube] 6BPXQMxdHog: Downloading video info webpage
[youtube] -j989rqetQE: Downloading webpage
[youtube] -j989rqetQE: Downloading video info webpage
[youtube] F-4h2WwVr3g: Downloading webpage
[youtube] F-4h2WwVr3g: Downloading video info webpage
[youtube] UZt7rP0poxs: Downloading webpage
[youtube] UZt7rP0poxs: Downloading video info webpage
[youtube] UZt7rP0poxs: Downloading MPD manifest
[youtube] UZt7rP0poxs: Downloading MPD manifest
[youtube] kGFuNGexHJY: Downloadi

[youtube] 6HaTNVSnHfM: Downloading video info webpage
[youtube] 6HaTNVSnHfM: Downloading MPD manifest
[youtube] 6JsyAfDtky8: Downloading webpage
[youtube] 6JsyAfDtky8: Downloading video info webpage
[youtube] rwQ_qJFooOE: Downloading webpage
[youtube] rwQ_qJFooOE: Downloading video info webpage
[youtube] rzjq73_Ll_k: Downloading webpage
[youtube] rzjq73_Ll_k: Downloading video info webpage
[youtube] s-yJ5j2wQzo: Downloading webpage
[youtube] s-yJ5j2wQzo: Downloading video info webpage
[youtube] pi3O5jFJq9o: Downloading webpage
[youtube] pi3O5jFJq9o: Downloading video info webpage
[youtube] pjNbWAZpUck: Downloading webpage
[youtube] pjNbWAZpUck: Downloading video info webpage
[youtube] w9gXURtwDLc: Downloading webpage
[youtube] w9gXURtwDLc: Downloading video info webpage
[youtube] w9HnJB8ay-A: Downloading webpage
[youtube] w9HnJB8ay-A: Downloading video info webpage
[youtube] Ccw33PBVIsQ: Downloading webpage
[youtube] Ccw33PBVIsQ: Downloading video info webpage
[youtube] T3kz36fWQjc: Do

[youtube] 1VyOFOuMLYw: Downloading video info webpage
[youtube] 1WIrQW2W4DA: Downloading webpage
[youtube] 1WIrQW2W4DA: Downloading video info webpage
[youtube] -kFzddE-hio: Downloading webpage
[youtube] -kFzddE-hio: Downloading video info webpage
[youtube] -kFzddE-hio: Downloading MPD manifest
[youtube] -kFzddE-hio: Downloading MPD manifest
[youtube] 3J4f-oWRx9s: Downloading webpage
[youtube] 3J4f-oWRx9s: Downloading video info webpage
[youtube] 4yG0tE0HgR0: Downloading webpage
[youtube] 4yG0tE0HgR0: Downloading video info webpage
[youtube] 4yG0tE0HgR0: Downloading MPD manifest
[youtube] 4yG0tE0HgR0: Downloading MPD manifest
[youtube] 4zd5MrPBpUg: Downloading webpage
[youtube] 4zd5MrPBpUg: Downloading video info webpage
[youtube] vR53CHmA-5U: Downloading webpage
[youtube] vR53CHmA-5U: Downloading video info webpage
[youtube] SOmPJOu5uI4: Downloading webpage
[youtube] SOmPJOu5uI4: Downloading video info webpage
[youtube] SOmPJOu5uI4: Downloading MPD manifest
[youtube] SOmPJOu5uI4: Down

[youtube] yQkwdH2xPJc: Downloading webpage
[youtube] yQkwdH2xPJc: Downloading video info webpage
[youtube] yQSw6ANRjfA: Downloading webpage
[youtube] yQSw6ANRjfA: Downloading video info webpage
[youtube] yRBlnLXzGsE: Downloading webpage
[youtube] yRBlnLXzGsE: Downloading video info webpage
[youtube] CORIT_Xa9-E: Downloading webpage
[youtube] CORIT_Xa9-E: Downloading video info webpage
[youtube] CPfMdyr6yxc: Downloading webpage
[youtube] CPfMdyr6yxc: Downloading video info webpage
[youtube] 8xlw3-_0nS0: Downloading webpage
[youtube] 8xlw3-_0nS0: Downloading video info webpage
[youtube] IsgCubwjiEw: Downloading webpage
[youtube] IsgCubwjiEw: Downloading video info webpage
[youtube] s6c7SUVZ0qU: Downloading webpage
[youtube] s6c7SUVZ0qU: Downloading video info webpage
[youtube] sChHFi9Jek8: Downloading webpage
[youtube] sChHFi9Jek8: Downloading video info webpage
[youtube] JTYXMjaeNMc: Downloading webpage
[youtube] JTYXMjaeNMc: Downloading video info webpage
[youtube] JVAqi47JIIY: Downloa

ERROR: This video contains content from WMG, who has blocked it in your country on copyright grounds.


[youtube] CZ16hxTWOQE: Downloading webpage
[youtube] CZ16hxTWOQE: Downloading video info webpage
[youtube] C_ceO9OfKWY: Downloading webpage
[youtube] C_ceO9OfKWY: Downloading video info webpage
[youtube] CazSMXyXnRo: Downloading webpage
[youtube] CazSMXyXnRo: Downloading video info webpage
[youtube] G013p3a0-io: Downloading webpage
[youtube] G013p3a0-io: Downloading video info webpage
[youtube] G013p3a0-io: Downloading MPD manifest
[youtube] G01Vqs-9DOA: Downloading webpage
[youtube] G01Vqs-9DOA: Downloading video info webpage
[youtube] G1K9nSkLVCA: Downloading webpage
[youtube] G1K9nSkLVCA: Downloading video info webpage
[youtube] oWOl4v2Fd_U: Downloading webpage
[youtube] oWOl4v2Fd_U: Downloading video info webpage
[youtube] oZSgPEO1vq8: Downloading webpage
[youtube] oZSgPEO1vq8: Downloading video info webpage
[youtube] oZbdxjsYa1U: Downloading webpage
[youtube] oZbdxjsYa1U: Downloading video info webpage
[youtube] EZKnGjV2yxo: Downloading webpage
[youtube] EZKnGjV2yxo: Downloading v

ERROR: This video is not available.


[youtube] EieKS8B9308: Downloading webpage
[youtube] EieKS8B9308: Downloading video info webpage
[youtube] EkIfOh-qoCU: Downloading webpage
[youtube] EkIfOh-qoCU: Downloading video info webpage
[youtube] EkWbb5uWfM4: Downloading webpage
[youtube] EkWbb5uWfM4: Downloading video info webpage
[youtube] Ems8epLlmuo: Downloading webpage
[youtube] Ems8epLlmuo: Downloading video info webpage
[youtube] M9scNjLDrlc: Downloading webpage
[youtube] M9scNjLDrlc: Downloading video info webpage
[youtube] MDYspQej9g0: Downloading webpage
[youtube] MDYspQej9g0: Downloading video info webpage
[youtube] U0qUDci54ow: Downloading webpage
[youtube] U0qUDci54ow: Downloading video info webpage
[youtube] U0qUDci54ow: Downloading MPD manifest
[youtube] U0qUDci54ow: Downloading MPD manifest
[youtube] U2It7kUpcQk: Downloading webpage
[youtube] U2It7kUpcQk: Downloading video info webpage
[youtube] BNJqdBdgp_w: Downloading webpage
[youtube] BNJqdBdgp_w: Downloading video info webpage
[youtube] BOJpCGi0GmY: Download

[youtube] 9IBGQqhusiQ: Downloading webpage
[youtube] 9IBGQqhusiQ: Downloading video info webpage
[youtube] Zm86vnVL9uE: Downloading webpage
[youtube] Zm86vnVL9uE: Downloading video info webpage
[youtube] Zm86vnVL9uE: Downloading MPD manifest
[youtube] Zm86vnVL9uE: Downloading MPD manifest
[youtube] ZmKwRzeOJ2k: Downloading webpage
[youtube] ZmKwRzeOJ2k: Downloading video info webpage
[youtube] WuwDByLsiXs: Downloading webpage
[youtube] WuwDByLsiXs: Downloading video info webpage
[youtube] Wu_YmVKqJJE: Downloading webpage
[youtube] Wu_YmVKqJJE: Downloading video info webpage
[youtube] WvrMf1YBNEQ: Downloading webpage
[youtube] WvrMf1YBNEQ: Downloading video info webpage
[youtube] zCMFpPqoZCA: Downloading webpage
[youtube] zCMFpPqoZCA: Downloading video info webpage
[youtube] O1U3jKLLek4: Downloading webpage
[youtube] O1U3jKLLek4: Downloading video info webpage
[youtube] 9vWVqelWvG0: Downloading webpage
[youtube] 9vWVqelWvG0: Downloading video info webpage
[youtube] 9vhooTZYo-4: Download

ERROR: This video contains content from [Merlin] IDJDigital, who has blocked it in your country on copyright grounds.


[youtube] iM-ItnDVXHI: Downloading webpage
[youtube] iM-ItnDVXHI: Downloading video info webpage
[youtube] As8QSSO9cSA: Downloading webpage
[youtube] As8QSSO9cSA: Downloading video info webpage
[youtube] M0ISvGtfLc8: Downloading webpage
[youtube] M0ISvGtfLc8: Downloading video info webpage
[youtube] M1O7VjAcvUs: Downloading webpage
[youtube] M1O7VjAcvUs: Downloading video info webpage
[youtube] M1O7VjAcvUs: Downloading MPD manifest
[youtube] AVCI8N05Ur8: Downloading webpage
[youtube] AVCI8N05Ur8: Downloading video info webpage
[youtube] AXfKZjsldK8: Downloading webpage
[youtube] AXfKZjsldK8: Downloading video info webpage
[youtube] AZf-3sLVd2k: Downloading webpage
[youtube] AZf-3sLVd2k: Downloading video info webpage
[youtube] iQCmf57n1QA: Downloading webpage
[youtube] iQCmf57n1QA: Downloading video info webpage
[youtube] iTo05B-KJQo: Downloading webpage
[youtube] iTo05B-KJQo: Downloading video info webpage
[youtube] u01ZYV-Rj7I: Downloading webpage
[youtube] u01ZYV-Rj7I: Downloading v

[youtube] YUJ4coZHI4M: Downloading video info webpage
[youtube] 3dtGKPHUXrI: Downloading webpage
[youtube] 3dtGKPHUXrI: Downloading video info webpage
[youtube] 3gehXju5FTw: Downloading webpage
[youtube] 3gehXju5FTw: Downloading video info webpage
[youtube] w-dzgASEII4: Downloading webpage
[youtube] w-dzgASEII4: Downloading video info webpage
[youtube] w3nfzB-H-qM: Downloading webpage
[youtube] w3nfzB-H-qM: Downloading video info webpage
[youtube] w5rl3GVIJbs: Downloading webpage
[youtube] w5rl3GVIJbs: Downloading video info webpage
[youtube] w7ZFhcJoFdo: Downloading webpage
[youtube] w7ZFhcJoFdo: Downloading video info webpage
[youtube] w75z31IBxpc: Downloading webpage
[youtube] w75z31IBxpc: Downloading video info webpage
[youtube] _uKeD8SSnnE: Downloading webpage
[youtube] _uKeD8SSnnE: Downloading video info webpage
[youtube] _x_pLAazxL8: Downloading webpage
[youtube] _x_pLAazxL8: Downloading video info webpage
[youtube] _zW9P8EJXug: Downloading webpage
[youtube] _zW9P8EJXug: Downloa

ERROR: The uploader has not made this video available in your country.


[youtube] 9K59v-lPTEU: Downloading webpage
[youtube] 9K59v-lPTEU: Downloading video info webpage
[youtube] 2hWc3SCKrXs: Downloading webpage
[youtube] 2hWc3SCKrXs: Downloading video info webpage
[youtube] UOi6VVNO6Wk: Downloading webpage
[youtube] UOi6VVNO6Wk: Downloading video info webpage
[youtube] 1f5OSEPQQV0: Downloading webpage
[youtube] 1f5OSEPQQV0: Downloading video info webpage
[youtube] 1f5OSEPQQV0: Downloading MPD manifest
[youtube] 1f5OSEPQQV0: Downloading MPD manifest
[youtube] 1hfkxZnOVBQ: Downloading webpage
[youtube] 1hfkxZnOVBQ: Downloading video info webpage
[youtube] zyqpAEdX0tQ: Downloading webpage
[youtube] zyqpAEdX0tQ: Downloading video info webpage
[youtube] zzxtPC3BawA: Downloading webpage
[youtube] zzxtPC3BawA: Downloading video info webpage
[youtube] EuhizqbdaO4: Downloading webpage
[youtube] EuhizqbdaO4: Downloading video info webpage
[youtube] EuhizqbdaO4: Downloading MPD manifest
[youtube] EyEUd_lh014: Downloading webpage
[youtube] EyEUd_lh014: Downloading vi

[youtube] WwaCrfKjHEc: Downloading MPD manifest
[youtube] fN2u_iCfPlQ: Downloading webpage
[youtube] fN2u_iCfPlQ: Downloading video info webpage
[youtube] 12meIAwCQI4: Downloading webpage
[youtube] 12meIAwCQI4: Downloading video info webpage
[youtube] 12meIAwCQI4: Downloading MPD manifest
[youtube] 12meIAwCQI4: Downloading MPD manifest
[youtube] 156IbBF4-rs: Downloading webpage
[youtube] 156IbBF4-rs: Downloading video info webpage
[youtube] 156IbBF4-rs: Downloading MPD manifest
[youtube] 156IbBF4-rs: Downloading MPD manifest
[youtube] 1AG3jjhHQcs: Downloading webpage
[youtube] 1AG3jjhHQcs: Downloading video info webpage
[youtube] 5iPNvZUSN-E: Downloading webpage
[youtube] 5iPNvZUSN-E: Downloading video info webpage
[youtube] hZ3nJI8y80g: Downloading webpage
[youtube] hZ3nJI8y80g: Downloading video info webpage
[youtube] hZh5DbnRYlc: Downloading webpage
[youtube] hZh5DbnRYlc: Downloading video info webpage
[youtube] ha_XaTYeL1Y: Downloading webpage
[youtube] ha_XaTYeL1Y: Downloading vid

ERROR: This video contains content from Xilam Animation, who has blocked it in your country on copyright grounds.


[youtube] Z3FFQVhYhXw: Downloading webpage
[youtube] Z3FFQVhYhXw: Downloading video info webpage
[youtube] 25APBqJBAek: Downloading webpage
[youtube] 25APBqJBAek: Downloading video info webpage
[youtube] 2Bbov7iif60: Downloading webpage
[youtube] 2Bbov7iif60: Downloading video info webpage
[youtube] 2Bbov7iif60: Downloading MPD manifest
[youtube] 2Bbov7iif60: Downloading MPD manifest
[youtube] 2Cly-MnjyWM: Downloading webpage
[youtube] 2Cly-MnjyWM: Downloading video info webpage
[youtube] P9uKyXepk3M: Downloading webpage
[youtube] P9uKyXepk3M: Downloading video info webpage
[youtube] YFFrXsWtRtM: Downloading webpage
[youtube] YFFrXsWtRtM: Downloading video info webpage
[youtube] YFFrXsWtRtM: Downloading MPD manifest
[youtube] YFFrXsWtRtM: Downloading MPD manifest
[youtube] YI9CYOMJgnk: Downloading webpage
[youtube] YI9CYOMJgnk: Downloading video info webpage
[youtube] 5tg-g1vxf4M: Downloading webpage
[youtube] 5tg-g1vxf4M: Downloading video info webpage
[youtube] ueYu2ELSxrE: Downloadi

ERROR: The uploader has not made this video available in your country.


[youtube] BW1ijtaThNo: Downloading webpage
[youtube] BW1ijtaThNo: Downloading video info webpage
[youtube] BYb0vAJI9jI: Downloading webpage
[youtube] BYb0vAJI9jI: Downloading video info webpage
[youtube] BYb0vAJI9jI: Downloading MPD manifest
[youtube] BYb0vAJI9jI: Downloading MPD manifest
[youtube] BdT6sEMUtio: Downloading webpage
[youtube] BdT6sEMUtio: Downloading video info webpage
[youtube] BeXp3OZ8Hc4: Downloading webpage
[youtube] BeXp3OZ8Hc4: Downloading video info webpage
[youtube] goucsRZV0VM: Downloading webpage
[youtube] goucsRZV0VM: Downloading video info webpage
[youtube] jFMl3q3ziCg: Downloading webpage
[youtube] jFMl3q3ziCg: Downloading video info webpage
[youtube] jJC1S1ZKuBk: Downloading webpage
[youtube] jJC1S1ZKuBk: Downloading video info webpage
[youtube] uBBpoRONa_Q: Downloading webpage
[youtube] uBBpoRONa_Q: Downloading video info webpage
[youtube] n-yo0x3-Xuc: Downloading webpage
[youtube] n-yo0x3-Xuc: Downloading video info webpage
[youtube] n-yo0x3-Xuc: Download

[youtube] RX7oSlS5zhM: Downloading MPD manifest
[youtube] RXkxn5qjMJc: Downloading webpage
[youtube] RXkxn5qjMJc: Downloading video info webpage
[youtube] ajOiniZ6Kl0: Downloading webpage
[youtube] ajOiniZ6Kl0: Downloading video info webpage
[youtube] akOWz3LjPFY: Downloading webpage
[youtube] akOWz3LjPFY: Downloading video info webpage
[youtube] L3FEIkYmtHA: Downloading webpage
[youtube] L3FEIkYmtHA: Downloading video info webpage
[youtube] L3IvVJ-lSBg: Downloading webpage
[youtube] L3IvVJ-lSBg: Downloading video info webpage
[youtube] wIfGoP4mbBs: Downloading webpage
[youtube] wIfGoP4mbBs: Downloading video info webpage
[youtube] wI_Qt0zI75Q: Downloading webpage
[youtube] wI_Qt0zI75Q: Downloading video info webpage
[youtube] dIhNoljSREo: Downloading webpage
[youtube] dIhNoljSREo: Downloading video info webpage
[youtube] gnf43OS_am4: Downloading webpage
[youtube] gnf43OS_am4: Downloading video info webpage
[youtube] LiTJ1N7tIpY: Downloading webpage
[youtube] LiTJ1N7tIpY: Downloading v

[youtube] Yp8h7r22AlU: Downloading video info webpage
[youtube] e8RC7PbcBz8: Downloading webpage
[youtube] e8RC7PbcBz8: Downloading video info webpage
[youtube] YdsFj5VZxso: Downloading webpage
[youtube] YdsFj5VZxso: Downloading video info webpage
[youtube] LTi1RLM39fo: Downloading webpage
[youtube] LTi1RLM39fo: Downloading video info webpage
[youtube] LUvnBGg_lO0: Downloading webpage
[youtube] LUvnBGg_lO0: Downloading video info webpage
[youtube] LVsD6WHHYIU: Downloading webpage
[youtube] LVsD6WHHYIU: Downloading video info webpage
[youtube] f-zyHaNsfkk: Downloading webpage
[youtube] f-zyHaNsfkk: Downloading video info webpage
[youtube] 1Te5WS1Pri0: Downloading webpage
[youtube] 1Te5WS1Pri0: Downloading video info webpage
[youtube] ncPlB2ufcxQ: Downloading webpage
[youtube] ncPlB2ufcxQ: Downloading video info webpage
[youtube] ncPlB2ufcxQ: Downloading MPD manifest
[youtube] ncPlB2ufcxQ: Downloading MPD manifest
[youtube] 2Oyg5TdDMC0: Downloading webpage
[youtube] 2Oyg5TdDMC0: Download

[youtube] HWFT1_jCaF4: Downloading video info webpage
[youtube] zYsvZt_16uw: Downloading webpage
[youtube] zYsvZt_16uw: Downloading video info webpage
[youtube] zYsvZt_16uw: Downloading MPD manifest
[youtube] mnCWkAf_Ehc: Downloading webpage
[youtube] mnCWkAf_Ehc: Downloading video info webpage
[youtube] 9tj2587rezs: Downloading webpage
[youtube] 9tj2587rezs: Downloading video info webpage
[youtube] cVoYWlch6Uo: Downloading webpage
[youtube] cVoYWlch6Uo: Downloading video info webpage
[youtube] cW0s_y7cjnc: Downloading webpage
[youtube] cW0s_y7cjnc: Downloading video info webpage
[youtube] 396pzztCYng: Downloading webpage
[youtube] 396pzztCYng: Downloading video info webpage
[youtube] G3_isPfugjo: Downloading webpage
[youtube] G3_isPfugjo: Downloading video info webpage
[youtube] G4kwiGMsqbE: Downloading webpage
[youtube] G4kwiGMsqbE: Downloading video info webpage
[youtube] OX2lDaUYZpg: Downloading webpage
[youtube] OX2lDaUYZpg: Downloading video info webpage
[youtube] utCEx8QNoL4: Do

ERROR: This video contains content from TV TOKYO Corporation, who has blocked it in your country on copyright grounds.


[youtube] nvwttVXcD98: Downloading webpage
[youtube] nvwttVXcD98: Downloading video info webpage
[youtube] nw0jenHJHZs: Downloading webpage
[youtube] nw0jenHJHZs: Downloading video info webpage
[youtube] nyWEVhHZkvY: Downloading webpage
[youtube] nyWEVhHZkvY: Downloading video info webpage
[youtube] kzchmVmzX6Q: Downloading webpage
[youtube] kzchmVmzX6Q: Downloading video info webpage
[youtube] l1KzWlcZIEc: Downloading webpage
[youtube] l1KzWlcZIEc: Downloading video info webpage
[youtube] l2pnyW6Gzic: Downloading webpage
[youtube] l2pnyW6Gzic: Downloading video info webpage
[youtube] W0xXnxcsvVA: Downloading webpage
[youtube] W0xXnxcsvVA: Downloading video info webpage
[youtube] W5RWngSDZAI: Downloading webpage
[youtube] W5RWngSDZAI: Downloading video info webpage
[youtube] H6Yizv1gwho: Downloading webpage
[youtube] H6Yizv1gwho: Downloading video info webpage
[youtube] H8be90NTn8Y: Downloading webpage
[youtube] H8be90NTn8Y: Downloading video info webpage
[youtube] HDroQNDekbA: Downloa

## Check the data before using it

In [10]:
data.describe()

Unnamed: 0,duration,views
count,1008.0,1008.0
mean,255.155754,73762.75
std,99.960183,754857.9
min,119.0,1011.0
25%,171.75,2356.75
50%,237.0,5223.5
75%,318.0,17225.25
max,501.0,16514710.0


In [9]:
display(data)

Unnamed: 0,creator,duration,id,labels,ladder,title,views
0,,210.0,eguZ69v_vlQ,"[26, 466]","[{'160 - 192x144 (DASH video)': 108.0}, {'133 ...",Muerte por amor cap 1 part 2 LPS,1058.0
1,Basshunter,216.0,ER9Hdp04tWs,"[12, 237]","[{'160 - 192x144 (144p)': 119.682}, {'133 - 32...",Best Football Stadiums 2008,19212.0
2,,177.0,ETF2-Zz3J18,[375],"[{'160 - 256x144 (144p)': 111.657}, {'133 - 42...",Law of Reflection Practical Activity for Students,146545.0
3,,301.0,jtvbLq9bYRc,[2607],"[{'160 - 236x144 (144p)': 37.789}, {'133 - 394...","10.2 (1 of 4) Arcs in a Circle, Basics",2394.0
4,,207.0,6BPXQMxdHog,"[2, 7, 1000]","[{'160 - 256x144 (144p)': 112.108}, {'133 - 42...",Tutorials with Twan - How to install the Brake...,14749.0
5,,145.0,-j989rqetQE,"[4, 9, 10, 37, 1523]","[{'160 - 256x144 (144p)': 114.595}, {'133 - 42...",Blues Guitar Epiphone Riviera!!! (slide playi...,2470.0
6,,281.0,F-4h2WwVr3g,"[0, 5, 16]","[{'160 - 216x144 (144p)': 116.82}, {'133 - 360...",Retrospectiva Branca de Neve Making Off,10209.0
7,Flexy,492.0,UZt7rP0poxs,"[0, 12, 91]","[{'160 - 192x144 (DASH video)': 108.0}, {'133 ...",أكاديمية الفتى العربي : عبد الله كوليبالي,9586.0
8,,204.0,kGFuNGexHJY,"[0, 1, 69, 236, 589]","[{'160 - 192x144 (DASH video)': 108.0}, {'133 ...",Nyoronoru (Fei Long) vs. GCYoshi (Cody) EVO 20...,1714.0
9,,480.0,kKZBuy8kaj8,"[5, 16]","[{'160 - 192x144 (144p)': 114.622}, {'133 - 19...",Dino Rey capitulo 47 parte 2/3,25590.0


## Export the data for others to use it

In [8]:
data.to_csv('yt8m_data-3.csv')