In [4]:
import pandas as pd
import numpy as np
from apiclient.discovery import build
from pymongo import MongoClient
import os
from dotenv import load_dotenv

In [18]:
# testing the youtube data api

In [19]:
# my public api key, not sure if you can use it, else set up one for yourself

load_dotenv()

key = os.getenv('GOOGLE_API_KEY')

In [20]:
# setting up the api client

client = build('youtube', 'v3', developerKey=key)

query = "Veritasium"

In [21]:
# hit a search query

search = client.search().list(q=query, part='snippet', type='video', maxResults=50).execute()

In [22]:
# filtering out the videoIds from the search query

videoIds = [v['id']['videoId'] for v in search['items']]

In [10]:
# fetching video content data for all videoIds

data = client.videos().list(id=videoIds, part="snippet").execute()

data['items'][0]['snippet']

{'publishedAt': '2022-05-12T14:52:13Z',
 'channelId': 'UCHnyfMqiRRG1u-2MsSQLbXA',
 'title': "A Picture of the Milky Way's Supermassive Black Hole",
 'description': "This is an image of the supermassive black hole, Sagittarius A*, at the center of our Milky Way galaxy.\nVisit https://www.kiwico.com/veritasium30 to get 30% off your first month of any crate!\n\n▀▀▀\nImage of Sgr A* from EHT collaboration\nEvent Horizon Telescope collaboration: https://ve42.co/EHT\n\nAnimations from The Relativistic Astrophysics group, Institute for Theoretical Physics,  Goethe-Universität Frankfurt. Massive thanks to Prof. Luciano Rezzolla, Dr Christian Fromm and Dr Alejandro Cruz-Osorio.\n\nA huge thanks to Prof. Peter Tuthill and Dr Manisha Caleb for feedback on earlier versions of this video and helping explain VLBI.\n\nGreat video by Thatcher Chamberlin about VLBI here – https://youtu.be/Y8rAHTvpJbk\n\nAnimations and simulations with English text:\nL. R. Weih & L. Rezzolla (Goethe University Frankfurt

In [11]:
# comparing video category ids with actual titles

for e in data['items']:
    print(e['snippet']['categoryId'], e['snippet']['title'])

27 A Picture of the Milky Way's Supermassive Black Hole
27 How Electricity Actually Works
27 Future Computers Will Be Radically Different
27 The Man Who Accidentally Killed The Most People In History
27 Celsius Made His Thermometer Upside Down
27 Is Success Luck or Hard Work?
27 Why Life Seems to Speed Up as We Age
27 The Most Powerful Computers You've Never Heard Of
27 The Absurdity of Detecting Gravitational Waves
27 The Surprising Secret of Synchronization
27 The Big Misconception About Electricity
27 Is Glass a Liquid?
27 This Particle Breaks Time Symmetry
27 This equation will change how you see the world (the logistic map)
27 How Horses Save Humans From Snake Bites
27 Why Gravity is NOT a Force
27 Parallel Worlds Probably Exist. Here’s Why
27 How Imaginary Numbers Were Invented
27 Most People Don't Know How Bikes Work
27 Why Einstein Thought Nuclear Weapons Impossible
27 Math's Fundamental Flaw
27 How Trees Bend the Laws of Physics
27 How Were the Pyramids Built?
27 This is why w

In [12]:
# fetching all possible categories for region 'DE' (Germany), you can try other regions as well

categories = client.videoCategories().list(part='snippet', regionCode='US').execute()
for c in categories['items']:
    print("ID: {0}, Title: {1}".format(c['id'], c['snippet']['title']))

ID: 1, Title: Film & Animation
ID: 2, Title: Autos & Vehicles
ID: 10, Title: Music
ID: 15, Title: Pets & Animals
ID: 17, Title: Sports
ID: 18, Title: Short Movies
ID: 19, Title: Travel & Events
ID: 20, Title: Gaming
ID: 21, Title: Videoblogging
ID: 22, Title: People & Blogs
ID: 23, Title: Comedy
ID: 24, Title: Entertainment
ID: 25, Title: News & Politics
ID: 26, Title: Howto & Style
ID: 27, Title: Education
ID: 28, Title: Science & Technology
ID: 29, Title: Nonprofits & Activism
ID: 30, Title: Movies
ID: 31, Title: Anime/Animation
ID: 32, Title: Action/Adventure
ID: 33, Title: Classics
ID: 34, Title: Comedy
ID: 35, Title: Documentary
ID: 36, Title: Drama
ID: 37, Title: Family
ID: 38, Title: Foreign
ID: 39, Title: Horror
ID: 40, Title: Sci-Fi/Fantasy
ID: 41, Title: Thriller
ID: 42, Title: Shorts
ID: 43, Title: Shows
ID: 44, Title: Trailers


In [23]:
# connect to mongo cloud db and select collection

uri = os.getenv('MONGO_URI')
client = MongoClient(uri)
db = client['youtube-db']
collection = db['video-information']



In [14]:
# clean data to insert

cleaned_data = [{'title': e['snippet']['title'], 'description': e['snippet']['description'], 'tags': e['snippet']['tags'], 'category_id':  int(e['snippet']['categoryId'])} for e in data['items']]

In [15]:
# insert data in db

collection.insert_many(cleaned_data)

<pymongo.results.InsertManyResult at 0x7fa37018a800>

In [16]:
collection.find_one()

{'_id': ObjectId('62866cd8a675906c1b0ad811'),
 'title': "A Picture of the Milky Way's Supermassive Black Hole",
 'description': "This is an image of the supermassive black hole, Sagittarius A*, at the center of our Milky Way galaxy.\nVisit https://www.kiwico.com/veritasium30 to get 30% off your first month of any crate!\n\n▀▀▀\nImage of Sgr A* from EHT collaboration\nEvent Horizon Telescope collaboration: https://ve42.co/EHT\n\nAnimations from The Relativistic Astrophysics group, Institute for Theoretical Physics,  Goethe-Universität Frankfurt. Massive thanks to Prof. Luciano Rezzolla, Dr Christian Fromm and Dr Alejandro Cruz-Osorio.\n\nA huge thanks to Prof. Peter Tuthill and Dr Manisha Caleb for feedback on earlier versions of this video and helping explain VLBI.\n\nGreat video by Thatcher Chamberlin about VLBI here – https://youtu.be/Y8rAHTvpJbk\n\nAnimations and simulations with English text:\nL. R. Weih & L. Rezzolla (Goethe University Frankfurt)\nhttps://youtu.be/jvftAadCFRI\n\nV

In [17]:
# fetch and insert data for multiple queries

queries = ["numberphile", "in a nutshell", "ted-ed", "Daniel Jung", "math explained", "learn a new language", "learn spanish", "learn english"]

for q in queries:
    # hit a search query
    search = client.search().list(q=q, part='snippet', type='video', maxResults=50).execute()
    
    # filtering out the videoIds from the search query
    videoIds = [v['id']['videoId'] for v in search['items']]
    
    # fetching video content data for all videoIds
    data = client.videos().list(id=videoIds, part="snippet").execute()
    
    cleaned_data = [{'title': e['snippet']['title'], 'description': e['snippet']['description'], 'category_id':  int(e['snippet']['categoryId'])} for e in data['items']]
    
    collection.insert_many(cleaned_data)


TypeError: 'Database' object is not callable. If you meant to call the 'search' method on a 'MongoClient' object it is failing because no such method exists.

In [None]:
educational = list(collection.find({"category_id": 27}))

In [None]:
len(educational)

337