# Importing Libraries

In [1]:
# Importing Packages
import pandas as pd
from google.cloud import storage
import os
from dotenv import load_dotenv
import io
from googleapiclient.discovery import build
from datetime import timedelta, datetime
import datetime
import io

In [2]:
load_dotenv()

api_key = os.getenv('API_KEY')

youtube = build('youtube', 'v3', developerKey=api_key)

In [3]:
# Variables
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
bucket_name = os.getenv('BUCKET_NAME')
folder_path = os.getenv('FOLDER_PATH')
project_name = os.getenv('PROJECT_NAME')
running_data_folder = os.getenv('RUNNING_DATA_FOLDER')
client = storage.Client()
blobs = client.list_blobs(bucket_name, prefix=folder_path)

In [4]:
def get_channel_stats(youtube_key, channel_ids):

    all_data = []

    request = youtube_key.channels().list(part='snippet,contentDetails,statistics', id=','.join(channel_ids))

    response = request.execute()

    for i in range(len(response['items'])):
        data = dict(channel_name = response['items'][i]['snippet']['title'],
                channel_id = response['items'][i]['id'],
                subscribers = response['items'][i]['statistics']['subscriberCount'],
                total_views = response['items'][i]['statistics']['viewCount'],
                total_videos = response['items'][i]['statistics']['videoCount'],
                playlist_id = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    channel_stats_df = pd.DataFrame(all_data)

    return channel_stats_df

In [5]:
def database_creator(data, days):

    # Filter by requested days

    filter_list = list(data[data['Day'] == int(days)]['video_id'])
    filtered_df = data[data['video_id'].isin(filter_list)]
    filtered_df = filtered_df[filtered_df['Day'] <= int(days)]

    # Flip
    filtered_df['Day'] = 'Day' + filtered_df['Day'].astype(str)
    df_pivoted = filtered_df.pivot_table(index='video_id', columns='Day', values=['view_count', 'like_count', 'comment_count'], aggfunc='first', fill_value=0).reset_index()

    columns_to_keep = ['video_id', 'channel_id', 'title', 'description', 'tags', 'caption',
        'licensed_content','category', 'duration_formatted',
        'no_of_tags', 'title_length', 'description_length',
        'published_at_formatted', 'extraction_date_formatted']
    df_unique_ids = filtered_df[columns_to_keep].copy()

    df_unique_ids.drop_duplicates(subset='video_id', keep='first', inplace=True)

    df_unique_ids.reset_index(drop=True, inplace=True)

    # Merge with the original DataFrame to include other information
    result_df = df_pivoted.merge(df_unique_ids, on='video_id', how='left')
    result_df.drop(columns=[('video_id', '')], inplace=True)


    # get list of channel ids of the videos
    channel_ids_list = list(set(result_df['channel_id']))
    
    # get channel stats dataframe
    channel_df = get_channel_stats(youtube, channel_ids_list)

    # merge final dataframe
    final_df = pd.merge(result_df, channel_df, on='channel_id', how='left')

    return final_df

In [6]:
def get_running_df():

    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob('YouTube_Running_Data/running_youtube_video_data.csv')
    running_csv = blob.download_as_string()
    running_df = pd.read_csv(io.BytesIO(running_csv), encoding='utf-8')

    return running_df

In [7]:
def interface_function():
    
    days = input('How many days data would you like? ')

    df = get_running_df()

    new_df = database_creator(df, days)

    return new_df

In [8]:
database = interface_function()
database

  result_df = df_pivoted.merge(df_unique_ids, on='video_id', how='left')


Unnamed: 0,video_id,"(comment_count, Day1)","(like_count, Day1)","(view_count, Day1)",channel_id,title,description,tags,caption,licensed_content,...,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted,channel_name,subscribers,total_views,total_videos,playlist_id
0,-4xVbmIzGfE,31,1026,14035,UCJublDh2UsiIKsAE1553miw,Very Common FAANG Interview Question! | Index ...,"dynamic programming, leetcode, coding intervie...","['leetcode', 'coding interview question', 'dat...",False,True,...,26,95,112,2024-02-03 02:30:00,2024-02-03 23:00:46,Greg Hogg,81300,8471143,635,UUJublDh2UsiIKsAE1553miw
1,-PAq8nh5txE,37,680,17862,UCNU_lfiiWBdtULKOw6X0Dig,In this Mass Layoff Becoming Recession proof-I...,Becoming Recession proof,[],False,True,...,2,61,24,2024-02-03 02:47:19,2024-02-03 23:00:46,Krish Naik,881000,89632543,1809,UUNU_lfiiWBdtULKOw6X0Dig
2,-f1o7i0ExhM,10,626,6644,UCJublDh2UsiIKsAE1553miw,VERY COMMON FAANG INTERVIEW QUESTION | Leetcod...,"dynamic programming, leetcode, coding intervie...","['leetcode', 'coding interview question', 'dat...",False,True,...,26,67,112,2024-01-23 12:25:48,2024-01-23 23:00:42,Greg Hogg,81300,8471143,635,UUJublDh2UsiIKsAE1553miw
3,-ny5_RSMV6k,37,458,6256,UCNU_lfiiWBdtULKOw6X0Dig,End To End Multi Language Invoice Extractor Pr...,Join me in this exciting video as we develop a...,"['yt:cc=on', 'machine learnign tutorials', 'in...",False,True,...,28,90,2414,2023-12-27 04:41:32,2023-12-27 23:00:42,Krish Naik,881000,89632543,1809,UUNU_lfiiWBdtULKOw6X0Dig
4,04SnAo0pZLM,0,0,0,UC79Gv3mYp6zKiSwYemEik9A,Adding Value in Pharma Through Data & AI Trans...,,[],False,False,...,2,55,0,2024-01-25 12:03:50,2024-01-25 23:00:43,DataCamp,158000,23954192,1576,UU79Gv3mYp6zKiSwYemEik9A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,z4u_fts6yfE,0,2,58,UC79Gv3mYp6zKiSwYemEik9A,#178 Making SMARTER Decisions with Lori Silver...,We don’t think about every decision we make. S...,"['podcast', 'data', 'ai', 'dataframed', 'lori ...",False,False,...,24,94,3271,2024-02-01 09:58:37,2024-02-01 23:00:45,DataCamp,158000,23954192,1576,UU79Gv3mYp6zKiSwYemEik9A
272,zNPrifp7djc,0,0,0,UC79Gv3mYp6zKiSwYemEik9A,Get Started Analyzing Survey Data with SQL & P...,,[],False,False,...,2,51,0,2024-01-16 12:38:53,2024-01-16 23:00:41,DataCamp,158000,23954192,1576,UU79Gv3mYp6zKiSwYemEik9A
273,zT-cc1IblsQ,12,192,3546,UCNU_lfiiWBdtULKOw6X0Dig,Complete MLOPS Platform To Build LLMs Applicat...,PostgresML is a complete MLOps platform in a P...,"['yt::cc=on', 'postgresml', 'postgresml tutori...",False,True,...,25,94,2086,2023-12-18 14:51:44,2023-12-18 23:00:52,Krish Naik,881000,89632543,1809,UUNU_lfiiWBdtULKOw6X0Dig
274,zee_QlhO1kQ,0,1,0,UC79Gv3mYp6zKiSwYemEik9A,Data & AI Trends & Predictions 2024,,[],False,False,...,2,35,0,2024-01-08 12:00:11,2024-01-08 23:00:54,DataCamp,158000,23954192,1576,UU79Gv3mYp6zKiSwYemEik9A
