In [None]:
import os
import pandas as pd
import json
from googleapiclient.discovery import build
from pymongo import MongoClient

#### Importing datasets

In [None]:
wishlist_df = pd.read_csv('./csvs/modified_wish_list_xx.csv')     # last updated wishlist videos collection
categories_df = pd.read_csv('./csvs/categories.csv')

In [None]:
wishlist_df.info()

#### Getting list of youtube_id for wishlist videos with no category in live dataset 

In [None]:
wishlist_YTids_noCategory_list = list(wishlist_df.loc[wishlist_df['category'].isna(),'youtube_id'])
wishlist_YTids_noCategory_list[:5]

### YouTube API key

In [None]:
developer_key = os.environ.get('DEVELOPER_KEY')

youtube = build('youtube', 'v3', developerKey=developer_key) # project-0127 Data Cleaning

### Connection to MongoDB

In [None]:
mongodb_uri = os.environ.get('MONGODB_URI')
database_name = os.environ.get('DATABASE_NAME')

client = MongoClient(mongodb_uri)
database = client[database_name]

#### Connecting to wishlist videos and categories collection

In [None]:
wishlist = database['modified_wish_list_xx']   # last updated wishlist videos collection
categories = database['categories']

#### Updating category in wishlist videos collection

In [None]:
wishlistDocs = wishlist.aggregate([
    {
        "$match": {"youtube_id": {"$in": wishlist_YTids_noCategory_list}}
    },
    {
        "$lookup": {
            "from": "categories",
            "localField": "category_id",
            "foreignField": "category_id",
            "as": "wishlistDoc"
        }
    },
    {
        "$unwind": "$wishlistDoc"
    }
])

In [None]:
wishlist_category_dict = {}
for doc in wishlistDocs:
    wishlist_category_dict[doc['youtube_id']] = doc['wishlistDoc']['title']

In [None]:
category = open('./youtube_api/wishlist_category.txt',"a")
category.write(json.dumps(wishlist_category_dict))
category.close()

In [None]:
try:
    for (key,value) in wishlist_category_dict.items():
        wishlist.update_many(
        {"youtube_id" : key},
        {"$set": { "category" : value}}
        );
except Exception as e:
    print(e)

#### Set category value as null for no category found

In [None]:
try:
    wishlist.update_many(
        {"category" : {'$exists': False}},
        {"$set": { "category" : None}}
        );
except Exception as e:
    print(e)