<a href="https://colab.research.google.com/github/Laya303/Customize-YouTube-Recommendations/blob/main/YouTube_Recommendations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install google-api-python-client

In [2]:
import pandas as pd
from datetime import datetime, timedelta
from tabulate import tabulate
from IPython.display import display, HTML
from apiclient.discovery import build

api_key = 'your_api_key_here'

In [3]:
def get_start_date_string(search_period_days):
    #Returns string for start-date of search period
    search_start_date = datetime.today() - timedelta(search_period_days)
    date_string = datetime(year=search_start_date.year,month=search_start_date.month,
                           day=search_start_date.day).strftime('%Y-%m-%dT%H:%M:%SZ')
    return date_string

In [4]:
#Parsing key information about vidoes

def find_title(item):
    title = item['snippet']['title']
    return title

def find_video_url(item):
    video_id = item['id']['videoId']
    video_url = "https://www.youtube.com/watch?v=" + video_id
    return video_url

def find_viewcount(item, youtube):
    video_id = item['id']['videoId']
    video_statistics = youtube.videos().list(id=video_id,
                                        part='statistics').execute()
    viewcount = int(video_statistics['items'][0]['statistics']['viewCount'])
    return viewcount

def find_channel_id(item):
    channel_id = item['snippet']['channelId']
    return channel_id

def find_channel_url(item):
    channel_id = item['snippet']['channelId']
    channel_url = "https://www.youtube.com/channel/" + channel_id
    return channel_url

def find_channel_title(channel_id, youtube):
    channel_search = youtube.channels().list(id=channel_id,
                                            part='brandingSettings').execute()
    channel_name = channel_search['items'][0]\
                                    ['brandingSettings']['channel']['title']
    return channel_name

def find_num_subscribers(channel_id, youtube):
    subs_search = youtube.channels().list(id=channel_id,
                                            part='statistics').execute()
    if subs_search['items'][0]['statistics']['hiddenSubscriberCount']:
        num_subscribers = 1000000
    else:
        num_subscribers = int(subs_search['items'][0]\
                                    ['statistics']['subscriberCount'])
    return num_subscribers

def view_to_sub_ratio(viewcount, num_subscribers):
    if num_subscribers == 0:
        return 0
    else:
        ratio = viewcount / num_subscribers
        return ratio

def age(item):
    when_published = item['snippet']['publishedAt']
    when_published_datetime_object = datetime.strptime(when_published,
                                                        '%Y-%m-%dT%H:%M:%SZ')
    today_date = datetime.today()
    days_since_published = int((today_date - when_published_datetime_object).days)
    if days_since_published == 0:
        days_since_published = 1
    return days_since_published

def custom_score(viewcount, ratio, days_since_published):
    ratio = min(ratio, 5)
    score = (viewcount * ratio) / days_since_published
    return round(score)


In [5]:
def find_videos(search_terms, api_key, views_threshold, uploaded_since):

    # Initialise results dataframe
    dataframe = pd.DataFrame(columns=('Title', 'Link', 'Score',
                            'Views', 'Channel','Subscribers',
                            'View-Subscriber Ratio','Channel Link'))

    # Run search and populate dataframe
    search_results, youtube_api = search_api(search_terms, api_key,
                                                        uploaded_since)

    results_df = populate_dataframe(search_results, youtube_api, dataframe,
                                                        views_threshold)

    return results_df

def search_api(search_terms, api_key, uploaded_since):
    
    # Initialise API call
    youtube_api = build('youtube', 'v3', developerKey = api_key)

    # Run search
    results = youtube_api.search().list(q=search_terms, part='snippet',
                                type='video', order='viewCount', maxResults=50,
                                publishedAfter=uploaded_since).execute()

    return results, youtube_api


def populate_dataframe(results, youtube_api, df, views_threshold):

    # Loop over search results and add key information to dataframe
    i = 1
    for item in results['items']:
        viewcount = find_viewcount(item, youtube_api)
        if viewcount > views_threshold:
            title = find_title(item)
            video_url = find_video_url(item)
            channel_url = find_channel_url(item)
            channel_id = find_channel_id(item)
            channel_name = find_channel_title(channel_id, youtube_api)
            num_subs = find_num_subscribers(channel_id, youtube_api)
            ratio = view_to_sub_ratio(viewcount, num_subs)
            days_since_published = age(item)
            score = custom_score(viewcount, ratio, days_since_published)
            df.loc[i] = [title, video_url, score, viewcount, channel_name,\
                                    num_subs, format(ratio, '.1f'), channel_url]
        i += 1
    return df


In [6]:
def search_each_term(search_terms, api_key, uploaded_since=get_start_date_string(7),
                        views_threshold=5000, num_to_print=5):
    
    #Uses search term to execute API calls
    if type(search_terms) == str:
        search_terms = [search_terms]

    
    for index, search_term in enumerate(search_terms):
        df = find_videos(search_terms[index], api_key, views_threshold=views_threshold,
                         uploaded_since = uploaded_since)
        df = df.sort_values(['Score'], ascending=[0])
    
    print("\n\nHi Sreelaya\nHere are the videos you should watch this week\n\n")
    print_top_videos(df, num_to_print)                                          #Prints top 5 vidoes
    
    print("\n\nThese are all the results fetched...\n")
    pd.set_option('max_colwidth', 400)

    #Modifying Title and Channel columns as hyperlinks
    df['Title'] = '<a href=' + df['Link'] + '><div>' + df['Title'] + '</div></a>'
    df['Channel'] = '<a href=' + df['Channel Link'] + '><div>' + df['Channel'] + '</div></a>' 
    final_df = df.drop(['Link', 'Channel Link'], axis=1)


    dfStyler = final_df.style.set_properties(subset=['Title'],**{'text-align': 'left'})
    dfStyler = final_df.style.set_properties(subset=['Score', 'Views', 'Channel','Subscribers',
                            'View-Subscriber Ratio'],**{'text-align': 'center'})
    dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'center')])]).hide_index()
    
    display(HTML(dfStyler.render()))

def print_top_videos(df, num_to_print):
    if len(df) < num_to_print:
        num_to_print = len(df)
    if num_to_print == 0:
        print("No video results found")
    else:
        for i in range(num_to_print):
            video = df.iloc[i]
            title = video['Title']
            link = video['Link']
            print("#{}\nTitle: '{}' \nURL: {} \n"\
                                        .format(i+1, title, link))



In [7]:
search_each_term("Machine Learning", api_key, num_to_print=5)                   #search_each_term("Data Science", api_key, '2021-01-11T00:00:00Z' )
 



Hi Sreelaya
Here are the videos you should watch this week


#1
Title: 'Linear Regression with Scikit Learn | Lesson 1 of 6 | Machine Learning with Python: Zero to GBMs' 
URL: https://www.youtube.com/watch?v=CVszSgTWODE 

#2
Title: 'XCiT: Cross-Covariance Image Transformers (Facebook AI Machine Learning Research Paper Explained)' 
URL: https://www.youtube.com/watch?v=g08NkNWmZTA 

#3
Title: '#55 Self-Supervised Vision Models (Dr. Ishan Misra - FAIR).' 
URL: https://www.youtube.com/watch?v=EXJmodhu4_4 

#4
Title: 'Künstliche Intelligenz: Wie Machine Learning unsere Gesellschaft verändert' 
URL: https://www.youtube.com/watch?v=xhOeuqL2Pro 

#5
Title: 'Sign Language Detection using ACTION RECOGNITION with Python | LSTM Deep Learning Model' 
URL: https://www.youtube.com/watch?v=doDUihpj6ro 



These are all the results fetched...



Title,Score,Views,Channel,Subscribers,View-Subscriber Ratio
Linear Regression with Scikit Learn | Lesson 1 of 6 | Machine Learning with Python: Zero to GBMs,1595,10752,Jovian,14500,0.7
XCiT: Cross-Covariance Image Transformers (Facebook AI Machine Learning Research Paper Explained),429,8865,Yannic Kilcher,91500,0.1
#55 Self-Supervised Vision Models (Dr. Ishan Misra - FAIR).,400,5153,Machine Learning Street Talk,16600,0.3
Künstliche Intelligenz: Wie Machine Learning unsere Gesellschaft verändert,304,14721,Florian Homm,238000,0.1
Sign Language Detection using ACTION RECOGNITION with Python | LSTM Deep Learning Model,208,5380,Nicholas Renotte,23200,0.2
White Box Vs Black Box Models In Machine Learning- Data Science Interview Question,47,8469,Krish Naik,384000,0.0
Deep Learning with Python Tutorial | Deep Learning Training | Edureka | DL Rewind - 1,5,5252,edureka!,2710000,0.0
