In [22]:
import argparse
from datetime import datetime
import os
import pandas as pd
import youtube_dl


def get_playlist_urls(playlist_path):
    ydl_opts = {"quiet":True}
    res = []
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        json_info = ydl.extract_info(playlist_path, download=False)
        if 'entries' in json_info:
            for item in json_info['entries']:
                res.append(item['webpage_url'])
    with open('yt_list.txt', 'a+') as f:
        for item in res:
            f.write(item + "\n")


def get_view_count(video_url):
    ydl_opts = {"quiet":True}
    res = {'title':'', 'count':-1, 'url':''}
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        json_info = ydl.extract_info(video_url, download=False)
        res['title']= json_info['title']
        res['count'] = json_info['view_count']
        res['url'] = json_info['webpage_url']
        res['upload_date'] = int(json_info['upload_date'])
        res['timestamp'] = get_date_time_string()
    return res

def get_date_time_string():
    obj_date = datetime.now()
    o_str = obj_date.strftime("%d-%b-%Y %H:%M")
    return o_str

def write_report(vc_list, report_name):
    df = pd.DataFrame(vc_list)
    df.to_csv(report_name, sep=';', index=False)

def get_parser():
    args_parser = argparse.ArgumentParser(description='Radio CICAP YouTube count views.')
    args_parser.add_argument('--save-playlist-urls', action='store_true', help='Save playlist urls flag. Use --video-url to specify the playlist.')
    args_parser.add_argument('--save-playlist-report', action='store_true', help='Save playlist report flag. Use --file-url-list to specify the txt playlist.')
    args_parser.add_argument('--update-report', type=str, help='Update report with current views. Use --file-url-list to specify the txt playlist.')

    g = args_parser.add_mutually_exclusive_group(required=True)
    g.add_argument('--video-url', type=str, help='YouTube video URL.')
    g.add_argument('--file-url-list', type=str, help='TxT file of YouTube video URLs.')
    return args_parser


def main():
    parser = get_parser()
    args = parser.parse_args()

    if args.video_url is not None:
        if args.save_playlist_urls:
            get_playlist_urls(args.video_url)
        else:
            video_url = 'https://www.youtube.com/watch?v=Sn7YrWMKOM4'
            vc = get_view_count(video_url)
            if vc['count'] > -1:
                note_str = "{}; {}; {}; {}".format(vc['title'], vc['url'],
                                                   vc['timestamp'],
                                                   vc['count'])
                print(note_str)
    if args.file_url_list is not None:
        vc_list = []
        with open(args.file_url_list, 'r') as f:
            for item in f:
                video_url = item.strip()
                vc = get_view_count(video_url)
                if vc['count'] > -1:
                    note_str = "{}; {}; {}; {}".format(vc['title'], vc['url'],
                                                       vc['timestamp'],
                                                       vc['count'])
                    tmp_count = vc['count']
                    tmp_date = vc['timestamp']
                    vc[tmp_date] = tmp_count
                    vc.pop('timestamp')
                    vc.pop('count')
                    vc_list.append(vc)
                    print(note_str)

        if args.save_playlist_report:
            if len(vc_list) > 0:
                report_name = 'yt_report.csv'
                write_report(vc_list, report_name)
        elif args.update_report is not None:
            df = pd.DataFrame(vc_list)
            origin = pd.read_csv(update_report, sep=';')
            a = origin.merge(df, on=['title', 'url', 'upload_date'])
            print(a)

In [49]:
file_url_list = "../yt_fest2020_list.txt"
update_report = "../yt_fest2020_report.csv"
if file_url_list is not None:
    vc_list = []
    with open(file_url_list, 'r') as f:
        for item in f:
            video_url = item.strip()
            vc = get_view_count(video_url)
            if vc['count'] > -1:
                note_str = "{}; {}; {}; {}".format(vc['title'], vc['url'],
                                                   vc['timestamp'],
                                                   vc['count'])
                tmp_count = vc['count']
                tmp_date = vc['timestamp']
                vc[tmp_date] = tmp_count
                vc.pop('timestamp')
                vc.pop('count')
                vc_list.append(vc)
                print(note_str)


    if update_report is not None:
        df = pd.DataFrame(vc_list)
        origin = pd.read_csv(update_report, sep=';')
        a = pd.merge(origin, df, how='right', on=['title', 'url', 'upload_date'])
        if any(a[df.columns[-1]] != a[origin.columns[-1]]):
            a.to_csv(update_report, sep=';', index=False)
            print(a)

Conversazioni sulla scienza con Piero Bianucci; https://www.youtube.com/watch?v=c5iJ-cXApDc; 17-Oct-2020 12:22; 551
Conversazioni sulla scienza con Amedeo Balbi; https://www.youtube.com/watch?v=ob4Ne27PpiQ; 17-Oct-2020 12:22; 672
Conversazioni sulla scienza con Silvia Bencivelli; https://www.youtube.com/watch?v=B2ziw2WUblc; 17-Oct-2020 12:22; 338
Conversazioni sulla scienza con Armando De Vincentiis; https://www.youtube.com/watch?v=M3Pbhfkw2mg; 17-Oct-2020 12:22; 121
Conversazioni sulla scienza con Armando De Vincentiis; https://www.youtube.com/watch?v=HDoNSuR2ydQ; 17-Oct-2020 12:22; 150
Conversazioni sulla scienza con Salvo di Grazia; https://www.youtube.com/watch?v=aqTJjF4PGWY; 17-Oct-2020 12:22; 321
Conversazioni sulla scienza con Stefano Bigliardi; https://www.youtube.com/watch?v=s0Dm6ezCu-o; 17-Oct-2020 12:22; 525
Conversazioni sulla scienza con Lucia Votano; https://www.youtube.com/watch?v=NHtMtl9xeLE; 17-Oct-2020 12:22; 552
CONVERSAZIONI SULLA SCIENZA con Veronica Padovani; http

In [31]:
origin

Unnamed: 0,title,url,upload_date,17-Oct-2020 12:01,17-Oct-2020 12:03
0,Conversazioni sulla scienza con Piero Bianucci,https://www.youtube.com/watch?v=c5iJ-cXApDc,20200930,551,551
1,Conversazioni sulla scienza con Amedeo Balbi,https://www.youtube.com/watch?v=ob4Ne27PpiQ,20201005,672,672
2,Conversazioni sulla scienza con Silvia Bencivelli,https://www.youtube.com/watch?v=B2ziw2WUblc,20201007,338,338
3,Conversazioni sulla scienza con Armando De Vin...,https://www.youtube.com/watch?v=M3Pbhfkw2mg,20201007,121,121
4,Conversazioni sulla scienza con Armando De Vin...,https://www.youtube.com/watch?v=HDoNSuR2ydQ,20201007,150,150
5,Conversazioni sulla scienza con Salvo di Grazia,https://www.youtube.com/watch?v=aqTJjF4PGWY,20201008,321,321
6,Conversazioni sulla scienza con Stefano Bigliardi,https://www.youtube.com/watch?v=s0Dm6ezCu-o,20200929,525,525
7,Conversazioni sulla scienza con Lucia Votano,https://www.youtube.com/watch?v=NHtMtl9xeLE,20200928,552,552
8,CONVERSAZIONI SULLA SCIENZA con Veronica Padovani,https://www.youtube.com/watch?v=yNipdCaGi50,20201001,245,245


In [44]:
any(a[df.columns[-1]] != a[origin.columns[-1]])

True

In [42]:
origin[origin.columns[-1]]

0    551
1    672
2    338
3    121
4    150
5    321
6    525
7    552
8    245
Name: 17-Oct-2020 12:03, dtype: int64

In [33]:
a = pd.merge(origin, df, how='right', on=['title', 'url', 'upload_date'])

In [34]:
a

Unnamed: 0,title,url,upload_date,17-Oct-2020 12:01,17-Oct-2020 12:03,17-Oct-2020 12:08
0,Conversazioni sulla scienza con Piero Bianucci,https://www.youtube.com/watch?v=c5iJ-cXApDc,20200930,551.0,551.0,551
1,Conversazioni sulla scienza con Amedeo Balbi,https://www.youtube.com/watch?v=ob4Ne27PpiQ,20201005,672.0,672.0,672
2,Conversazioni sulla scienza con Silvia Bencivelli,https://www.youtube.com/watch?v=B2ziw2WUblc,20201007,338.0,338.0,338
3,Conversazioni sulla scienza con Armando De Vin...,https://www.youtube.com/watch?v=M3Pbhfkw2mg,20201007,121.0,121.0,121
4,Conversazioni sulla scienza con Armando De Vin...,https://www.youtube.com/watch?v=HDoNSuR2ydQ,20201007,150.0,150.0,150
5,Conversazioni sulla scienza con Salvo di Grazia,https://www.youtube.com/watch?v=aqTJjF4PGWY,20201008,321.0,321.0,321
6,Conversazioni sulla scienza con Stefano Bigliardi,https://www.youtube.com/watch?v=s0Dm6ezCu-o,20200929,525.0,525.0,525
7,Conversazioni sulla scienza con Lucia Votano,https://www.youtube.com/watch?v=NHtMtl9xeLE,20200928,552.0,552.0,552
8,CONVERSAZIONI SULLA SCIENZA con Veronica Padovani,https://www.youtube.com/watch?v=yNipdCaGi50,20201001,245.0,245.0,245
9,Conversazioni sulla scienza con Francesco Grassi,https://www.youtube.com/watch?v=g-_8OcSQcSw,20201012,,,259
