In [10]:
import os
import pandas as pd
import json
from googleapiclient.discovery import build
from pymongo import MongoClient

#### Importing datasets

In [3]:
videos_df = pd.read_csv('./csvs/modified_videos_xx.csv')     # last updated videos collection
categories_df = pd.read_csv('./csvs/categories.csv')

In [None]:
videos_df.info()

#### Getting list of youtube_id for videos with no category in live dataset 

In [5]:
videos_YTids_noCategory_list = list(videos_df.loc[videos_df['category'].isna(),'youtube_id'])
videos_YTids_noCategory_list[:5]

['k8TJcbRauzM', 'VQ-DpvVA-T4', 'K_8EuyzTkOs', 'rDBj779ERt0', 'Qw-LTlrT7qQ']

### YouTube API key

In [2]:
developer_key = os.environ.get('DEVELOPER_KEY')

youtube = build('youtube', 'v3', developerKey=developer_key) # project-0127 Data Cleaning

### Connection to MongoDB

In [None]:
mongodb_uri = os.environ.get('MONGODB_URI')
database_name = os.environ.get('DATABASE_NAME')

client = MongoClient(mongodb_uri)
database = client[database_name]

#### Connecting to videos and categories collection

In [9]:
videos = database['modified_videos_xx']   # last updated videos collection
categories = database['categories']

#### Updating category in videos collection

In [11]:
videoDocs = videos.aggregate([
    {
        "$match": {"youtube_id": {"$in": videos_YTids_noCategory_list}}
    },
    {
        "$lookup": {
            "from": "categories",
            "localField": "category_id",
            "foreignField": "category_id",
            "as": "videoDoc"
        }
    },
    {
        "$unwind": "$videoDoc"
    }
])

In [12]:
videos_category_dict = {}
for doc in videoDocs:
    videos_category_dict[doc['youtube_id']] = doc['videoDoc']['title']

In [13]:
category = open('./youtube_api/videos_category.txt',"a")
category.write(json.dumps(videos_category_dict))
category.close()

In [14]:
try:
    for (key,value) in videos_category_dict.items():
        videos.update_many(
        {"youtube_id" : key},
        {"$set": { "category" : value}}
        );
except Exception as e:
    print(e)

#### Set category value as null for no category found

In [15]:
try:
    videos.update_many(
        {"category" : {'$exists': False}},
        {"$set": { "category" : None}}
        );
except Exception as e:
    print(e)