In [2]:
import os
from pymongo import MongoClient
import pandas as pd
import json
from googleapiclient.discovery import build

#### Importing videos csv

In [7]:
videos_df = pd.read_csv('./csvs/modified_videos.csv')     # Path to videos or modified_videos csv, whichever is updated

In [8]:
videos_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7355 entries, 0 to 7354
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   _id                 7355 non-null   object 
 1   audio_descriptions  7355 non-null   object 
 2   category            3631 non-null   object 
 3   category_id         3631 non-null   float64
 4   created_at          7353 non-null   float64
 5   description         5376 non-null   object 
 6   duration            3631 non-null   float64
 7   legacy_video_id     1282 non-null   float64
 8   tags                7005 non-null   object 
 9   title               7309 non-null   object 
 10  updated_at          7355 non-null   int64  
 11  views               7355 non-null   int64  
 12  youtube_id          7355 non-null   object 
 13  youtube_status      7355 non-null   object 
dtypes: float64(4), int64(2), object(8)
memory usage: 804.6+ KB


#### Getting list of youtube_id for all videos in live dataset 

In [None]:
videos_YTids_list = list(videos_df['youtube_id'])

### YouTube API key

In [None]:
developer_key = os.environ.get('DEVELOPER_KEY')

youtube = build('youtube', 'v3', developerKey=developer_key) # project-0127 Data Cleaning


##### Fetching youtube status of videos

In [None]:
part_string = ['status', 'contentDetails']
videos_youtube_id = videos_YTids_list

In [None]:
# Considering videos youtube status available when
#     - embeddable = True
#     - privacy status = public / unlisted
#     - upload status = processed / uploaded

videos_youtube_status_dict = {}
for idx in range(0,len(videos_youtube_id)):

    try:
        request = youtube.videos().list(
            part=part_string,
            id=videos_youtube_id[idx],
        )
        response = request.execute()

        if response['items']!=[] and all(key in response['items'][0]['status'] for key in ('embeddable', 'privacyStatus', 'uploadStatus')):
            if response['items'][0]['status']['embeddable'] == True and (response['items'][0]['status']['privacyStatus'] == 'public' or response['items'][0]['status']['privacyStatus'] == 'unlisted') and (response['items'][0]['status']['uploadStatus'] == 'processed' or response['items'][0]['status']['uploadStatus'] == 'uploaded'):

                videos_youtube_status = 'available'
        else:
            videos_youtube_status = 'unavailable'

    except Exception as e:
        print("An exception occured")
        print(e)
        videos_youtube_status = ''

    videos_youtube_status_dict[videos_youtube_id[idx]] = videos_youtube_status   
        

In [None]:
videos_youtube_status = open('./youtube_api/videos_youtube_status.txt',"a")
videos_youtube_status.write(json.dumps(videos_youtube_status_dict))
videos_youtube_status.close()

### Connection to MongoDB

In [None]:
mongodb_uri = os.environ.get('MONGODB_URI')
database_name = os.environ.get('DATABASE_NAME')

client = MongoClient(mongodb_uri)
database = client[database_name]

#### Connecting to videos collection

In [None]:
videos = database['modified_videos']  # videos = database['videos'] if modified_videos do not exist

##### Updating youtube_status for videos

In [None]:
for (key,value) in videos_youtube_status_dict.items():
    videos.update_many(
    {"youtube_id" : key},
    {"$set": { "youtube_status" : value}}
    );