In [2]:
import os
import datetime as datetime
import pytz
import pandas as pd
import csv
import json
import time

from retrying import retry
from retrying import RetryError
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser

DEVELOPER_KEY= os.environ.get('API_CODE')
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
service= build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

In [3]:
def channel_date_keyword(searchKey,pageToken,dt,dt2):
    search_list=service.search().list(
        q=searchKey,
        part="id,snippet",
        maxResults=50,
        order="relevance",
        type="channel",
        publishedAfter=dt,
        publishedBefore=dt2,
        pageToken=pageToken
    ).execute()
    
    return search_list

In [6]:
@retry(stop_max_attempt_number=7,retry_on_exception=is_503_error, 
       wrap_exception=True,wait_random_min=1000, wait_random_max=2000)
def channel_by_id(search_id):
    channels=service.channels().list(
        id=search_id,
        part="id, snippet, brandingSettings, contentDetails, invideoPromotion, statistics, topicDetails",
        maxResults=50
    ).execute()
    return channels

In [7]:
# get basic channel list with ids (years defined)
def get_channel_list(year,yearParts,day,searchKey,fileName,fileName2,prevQuota):
    newQuota=prevQuota
    
    for x in range (1,yearParts+1):
        pageToken=""
        d=datetime.datetime(year,int(1+(12/yearParts)*(x-1)),1,0,0).isoformat()+'Z'
        d2=datetime.datetime(year,int((12/yearParts)*x),day[x-1],0,0).isoformat()+'Z'
       
        while(pageToken!='CLYHEAA'):
            channelList=channel_date_keyword(searchKey,pageToken,d,d2)
            newQuota+=100
            
            #save list and channels if list is not empty
            if (channelList.get("items",[])!=[]):
                
                #save json dict Channel List 50 items
                with open(fileName,'a')as fp1:
                    out1=json.dumps(channelList)
                    fp1.write(out1 + '\n')
            
                #save the data about the channels in the list
                for channel in channelList.get("items", []):
                    try:
                        channelData=channel_by_id(channel["id"]["channelId"])
                    except RetryError as e:
                        print("max unsuccessful attempts reached"+" id:"+channel["id"]["channelId"])
                        continue
                    newQuota+=15
                    with open(fileName2,'a')as fp2:
                        out2=json.dumps(channelData)
                        fp2.write(out2 + '\n')
                    time.sleep(1)
                    
            pageToken=channelList["nextPageToken"]
            time.sleep(1)
            
        time.sleep(1)
        
    return newQuota


In [None]:
# get trimestral info for 10 years of channels related to search keys
#year=2007
yearParts=3
day=[30,31,31]
searchKeys=["minecraft","roblox","call of duty","overwatch"]
fileNames=['minecraft_ch_list.json','roblox_ch_list.json','callOD_ch_list.json','overwatch_ch_list.json']
fileNames2=['minecraft_ch_data.json','roblox_ch_data.json','callOD_ch_data.json','overwatch_ch_data.json']
prevQuota=510

for i in range (3,4):
    for year in range (2007,2018):
        quota=get_channel_list(year,yearParts,day,searchKeys[i],fileNames[i],fileNames2[i],prevQuota)
        prevQuota=quota
        print (prevQuota)
        time.sleep(10)

25965
max unsuccessful attempts reached id:UC_YKw_NbRIrF_dHoQ_p8Phg
max unsuccessful attempts reached id:UCeaEfGFzvguYyHWObwNifUA
55035
80955
max unsuccessful attempts reached id:UC-CqnZSHD5UUfOQypw6kvzA
106425
131940
161670
max unsuccessful attempts reached id:UCPJb-LafXEyc_i3pV4C4nMQ
187410
213375
max unsuccessful attempts reached id:UCqsD102F-Vh2OxWM7Xw44Sw
239190
266550


In [5]:
def is_503_error(exception):
    is503=False
    if (isinstance(exception, HttpError)):
        if (exception.resp.status==503):
            is503=True
    return (is503)

-------------- SOME TEST LINES --------------------------

In [6]:
d=datetime.datetime(2017,9,1,0,0).isoformat()+'Z'
d2=datetime.datetime(2017,12,31,0,0).isoformat()+'Z'
channelList=channel_date_keyword('minecraft','',d,d2)

In [10]:
if (channelList.get("items",[])==[]):
    print ('not empty')

not empty


In [61]:
for channel in channelList.get("items", []):
    channelData=channel_by_id(channel["id"]["channelId"])
    print ("pp")

In [1]:
# def test(quota):
#     myQuota=quota
#     myQuota+=100
#     return (myQuota)
# q=test(500)
# print (q)


-------------------- END TEST LINES -----------------------

------------MINECRAFT 10 YEARS------------------
24510
max unsuccessful attempts reached id:UCL1tBPx1F9AR70_o_8Os69g
max unsuccessful attempts reached id:UC3mqPSF_ER-JQBrJ1AeZBiQ
53085
79185
103905
128790
158010
max unsuccessful attempts reached id:UCXG90a9uCLYrQ8P4KzcgvLA
184320
max unsuccessful attempts reached id:UCzI_WNuSqv7MpG3Fc5c8_Sw
210510
236505
265305

23970
50520
74880
99045
124365
max unsuccessful attempts reached id:UCiTotY7g2PnOSqkUAdkqrmg
154620
181605
208680
235410

-----call of duty 10 years--------------
25350
max unsuccessful attempts reached id:UCY6f5qHCaoQDaPtnAViiORQ
54090
max unsuccessful attempts reached id:UCSiheoc_gFYfqw_MNFavj5g
80445
max unsuccessful attempts reached id:UCezm-lopPPR6OFiDwgKzDkw
106380
132030
161415
187890
214950
max unsuccessful attempts reached id:UCa53gqLx3zxaAITSF0bgmkQ
241785
271095
max unsuccessful attempts reached id:UCafH9QUMrjWaJh2ip1EVBqA

------ overwatch 10 years --------------------
25965
max unsuccessful attempts reached id:UC_YKw_NbRIrF_dHoQ_p8Phg
max unsuccessful attempts reached id:UCeaEfGFzvguYyHWObwNifUA
55035
80955
max unsuccessful attempts reached id:UC-CqnZSHD5UUfOQypw6kvzA
106425
131940
161670
max unsuccessful attempts reached id:UCPJb-LafXEyc_i3pV4C4nMQ
187410
213375
max unsuccessful attempts reached id:UCqsD102F-Vh2OxWM7Xw44Sw
239190
266550

In [None]:
with open ('minecraft_ch_data.json','r')as handle:
    json_dict= json.loads(line) for line in handle
    for channel in json_dict.get("items", []):
        print(channel["id"]["channelId"])