In [306]:
import os
import pandas as pd
pd.options.display.max_columns = None
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
from utils import searchVideosByListOfIds, fillCategoryIds
from datetime import datetime, timedelta
import sys, traceback

# api_key = os.environ.get('YT_API')
api_key = "XYZ"

# df = pd.read_csv("../../our_data/Etap4/GB_US-rdy-to-learn-properly-V2.csv")
# df = pd.read_csv("../../our_data/Etap3/dfGB_merged.csv")
# df = pd.read_csv("../../our_data/Etap1/GB_US.csv")

todownload = {0:"US",1:"GB"}
download = todownload[0] #HERE CHOSE WHICH ONE TO DOWNLOAD
NUMBER_OF_VIDEOS = 10

start_datetime = "2017-11-14T23:59:59Z"
end_datetime = "2018-06-14T23:59:59Z"


COLUMNS =['video_id', 'trending_date', 'title', 'channel_title', 'category_id',
       'publish_time', 'tags', 'views', 'likes', 'dislikes', 'comment_count',
       'thumbnail_link', 'comments_disabled', 'ratings_disabled',
       'video_error_or_removed', 'description ']

## CREATING LIST OF ACC NAMES (RUN IT IF YOU NEED A NEW ONE)

In [294]:
# GBdf = pd.read_csv("./../../data/youtube_data/GB_videos_5p.csv", sep=';', encoding='utf_16')
# USdf = pd.read_csv("./../../data/youtube_data/US_videos_5p.csv", sep=';')
# GB_names = pd.DataFrame(set(GBdf.channel_title))
# US_names = pd.DataFrame(set(USdf.channel_title))
# GB_names.to_csv("../../our_data/Etap5/GB_to_download.csv", index=False)
# US_names.to_csv("../../our_data/Etap5/US_to_download.csv", index=False)

### Functions communicating YT API

In [257]:
def searchByQuery(query):

    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    youtube = googleapiclient.discovery.build(
        "youtube", "v3", developerKey=api_key)

    request = youtube.search().list(
        part="snippet",
        maxResults=1,
        q=query
    )
    response = request.execute()
    return response

def searchChannelIdByName(channel_name):
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    youtube = googleapiclient.discovery.build(
        "youtube", "v3", developerKey=api_key)

    request = youtube.channels().list(
        part="id",
        forUsername=channel_name
    )
    response = request.execute()
    print(response)
    if response['pageInfo']['totalResults']==0:
        print("NO channel ID")
        return -1
    return response['items'][0]['id']


def getVideosOfChannelBetweenDates(channel_name, start_datetime, end_datetime):

    # Disable OAuthlib's HTTPS verification when running locally.

    youtube = googleapiclient.discovery.build(
        "youtube", "v3", developerKey=api_key)
    cID = searchChannelIdByName(channel_name)
    if cID == -1:
        return -1
    request = youtube.search().list(
        part=["snippet"],
        channelId =cID,
        
#         type='video',
        publishedBefore=end_datetime,
        publishedAfter=start_datetime,
#         pageToken="CDIQAA",
        maxResults=NUMBER_OF_VIDEOS

    )
    
    return request.execute()

def getStatisticsForVideoID(videoID):
    youtube = googleapiclient.discovery.build(
        "youtube", "v3", developerKey=api_key)
    
    request = youtube.videos().list(
    part= 'statistics',
    maxResults=1,
    id = videoID
    )
    return request.execute()
    
def getFeaturesForChannel(channel_name,start_datetime,end_datetime):
    
    response_videos = getVideosOfChannelBetweenDates(channel_name, start_datetime, end_datetime)
#     print(response_videos)
    newDf = pd.DataFrame(columns=COLUMNS)
    if response_videos == -1:
        return newDf
    if response_videos['items'] == []:
        print("NO MOVIES")
        return newDf
    
    for vid in response_videos['items']:
        vid_id = vid['id']['videoId']
        stats = getStatisticsForVideoID(vid_id)
        data ={
        'video_id' : [vid_id],
        'trending_date' :[None],
        'title' : [vid['snippet']['title']],
        'channel_title' : [channel_name],
        'category_id': [None], #vid['snippet']['title']
        'publish_time' : [vid['snippet']['publishedAt']],
        'tags' : [None], #vid['snippet']['tags']
        'views' : [stats['items'][0]['statistics']['viewCount']],
        'likes' : [stats['items'][0]['statistics']['likeCount']],
        'dislikes' : [stats['items'][0]['statistics']['dislikeCount']],
        'comment_count' : [stats['items'][0]['statistics']['commentCount']],
        'thumbnail_link' : [vid['snippet']['thumbnails']['high']['url']],
        'comments_disabled': [None],
        'ratings_disabled': [None],
        'video_error_or_removed' : [None],
        'description ' : [vid['snippet']['description']]
        }
        temp = pd.DataFrame(data)
        newDf = pd.concat([newDf, temp], axis=0)
    return newDf


## DOWNLOADING DATA

In [304]:
GB_names_to_download =  pd.read_csv("../../our_data/Etap5/GB_to_download.csv")
US_names_to_download =  pd.read_csv("../../our_data/Etap5/US_to_download.csv")

names_to_delete = []
names = pd.DataFrame()

if download == 'GB':
    names = GB_names_to_download
elif download == "US":
    names = US_names_to_download


newDataDf = pd.DataFrame(columns=COLUMNS)
    
for i,row in names.iterrows():
    name = row[0]
    print(name)
    try:
        tempDF = getFeaturesForChannel(name,start_datetime,end_datetime)
        newDataDf = pd.concat([newDataDf, tempDF], axis=0)
    except Exception as e:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        if exc_type == googleapiclient.errors.HttpError:
            print("YT API LIMIT ACHIVED")
            break
        print("ERROR: ", e)
    names_to_delete.append(name)
        
        
# removing names
new_names = set(names['0'])
new_names = new_names.difference(set(names_to_delete))


if download == 'GB':
    GB_names = pd.DataFrame(new_names)
    GB_names.to_csv("../../our_data/Etap5/GB_to_download.csv", index=False)
    
    if os.path.isfile("../../our_data/Etap5/GB_new_data.csv"):
        oldDataDF = pd.read_csv("../../our_data/Etap5/GB_new_data.csv")
        newDataDf = pd.concat([newDataDf,oldDataDF], axis=0)
    newDataDf.to_csv("../../our_data/Etap5/GB_new_data.csv")
    
elif download == "US":
    US_names = pd.DataFrame(new_names)
    US_names.to_csv("../../our_data/Etap5/US_to_download.csv", index=False)
    
    if os.path.isfile("../../our_data/Etap5/US_new_data.csv"):
        oldDataDF = pd.read_csv("../../our_data/Etap5/US_new_data.csv")
        newDataDf = pd.concat([newDataDf,oldDataDF], axis=0)
        
    newDataDf.to_csv("../../our_data/Etap5/US_new_data.csv")
    

Nicki Minaj
YT API LIMIT ACHIVED
