# Import Libraries

In [158]:
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sns
import pickle
from datetime import timedelta, datetime
import datetime
import os
from dotenv import load_dotenv

# YouTube Connection

In [159]:
load_dotenv()

api_key = os.getenv('API_KEY')

youtube = build('youtube', 'v3', developerKey=api_key)

# Importing Data

In [160]:
df = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-25 23_00_40.csv')
df.head()

Unnamed: 0,video_id,channel_id,published_at,title,description,tags,category_id,duration,caption,licensed_content,default_language,content_rating,view_count,like_count,favourite_count,comment_count,extraction_date
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,2023-12-15T15:00:16Z,How to Build Custom ChatGPTs to Learn FAST in ...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",28,PT12M,True,True,en-US,{},4240,225,0,18,2023-12-25 23:00:40
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,2023-12-15T13:00:08Z,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",27,PT58S,False,True,en,{},28854,1161,0,19,2023-12-25 23:00:40
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,2023-12-15T16:00:16Z,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",27,PT15M11S,True,True,en-US,{},20779,1316,0,97,2023-12-25 23:00:40
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,2023-12-15T18:47:11Z,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",27,PT55M18S,True,False,en-US,{},266,6,0,1,2023-12-25 23:00:40
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,2023-12-15T16:35:30Z,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],24,P0D,False,False,en-US,{},0,0,0,0,2023-12-25 23:00:40


# Day Function

In [161]:
def day_gen(data):

    # converting published at
    data['published_at_formatted'] = data['published_at'].str.replace('Z','')
    data['published_at_formatted'] = data.published_at_formatted.apply(datetime.datetime.fromisoformat)

    # converting extraction date
    data['extraction_date_formatted'] = pd.to_datetime(data['extraction_date'], format='%Y-%m-%d %H:%M:%S')

    # creating Day column
    data['Day'] =  ((data['extraction_date_formatted'] - data['published_at_formatted']).dt.days) + 1

    data.drop(columns=['published_at', 'extraction_date'], inplace=True)

    return data

# Cleaning Function

In [162]:
def clean(data):
    
    # Columns to String
    columns_to_str = ['video_id', 'channel_id', 'title', 'description']

    for column in columns_to_str:
        data[column] = data[column].astype('string')

    # Object to Int
    columns_to_int = ['view_count', 'like_count', 'comment_count']

    for column in columns_to_int:
        data[column] = data[column].astype('int64')

    # YouTube Categories
    def get_categories(youtube, wanted_categories):

        all_data = []
        
        request = youtube.videoCategories().list(part='snippet', id=','.join(wanted_categories))
        
        response = request.execute()
        
        for i in range(len(response['items'])):
            data = dict(category_id = response['items'][i]['id'],
                    category= response['items'][i]['snippet']['title'])
            all_data.append(data)

        return all_data
    
    data['category_id'] = data['category_id'].astype('str')
    category_ids = data['category_id'].unique()
    category_dict = get_categories(youtube, category_ids)
    category_df = pd.DataFrame(category_dict)
    data = pd.merge(data, category_df, on='category_id', how='left')

    # Extracting Duration
    data['duration'] = data['duration'].str.replace('PT', '')
    def format_duration(time_string):

        # store hours, minutes, seconds as integers
        H = 0
        M = 0
        S = 0

        # check if vid time contains hours, minutes and/or seconds
        if 'H' in time_string:
            H += int(time_string.split('H')[0])
        if 'M' in time_string:
            M += int(time_string.split('M')[0].split('H')[-1])
        if 'S' in time_string:
            S += int(time_string.split('S')[0].split('M')[-1].split('H')[-1])
        
        formatted_time = timedelta(hours=H, minutes=M, seconds=S)

        return formatted_time

    data['duration_formatted'] = data['duration'].apply(format_duration)

    # Object to Bool
    columns_to_bool = ['caption', 'licensed_content']

    for column in columns_to_bool:
        data[column] = data[column].astype('bool')

    # Creating No. of Tags Column
    data['no_of_tags'] = data['tags'].apply(lambda x: len(set(x)))

    # Creating Title Length Column
    data['title_length'] = data['title'].apply(len)

    # Creating Description Length Column
    data['description_length'] = data['description'].apply(len)

    # Dropping Columns
    columns_to_drop = ['category_id', 'duration', 'content_rating']
    data.drop(columns=columns_to_drop, inplace=True)

    return data

# Testing Cleaning function on OG df

In [163]:
clean_df = clean(df)

In [164]:
clean_df = day_gen(clean_df)

In [165]:
clean_df.head()

Unnamed: 0,video_id,channel_id,title,description,tags,caption,licensed_content,default_language,view_count,like_count,favourite_count,comment_count,category,duration_formatted,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted,Day
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,How to Build Custom ChatGPTs to Learn FAST in ...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",True,True,en-US,4240,225,0,18,Science & Technology,0 days 00:12:00,36,52,4434,2023-12-15 15:00:16,2023-12-25 23:00:40,11
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,28854,1161,0,19,Education,0 days 00:00:58,26,63,91,2023-12-15 13:00:08,2023-12-25 23:00:40,11
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",True,True,en-US,20779,1316,0,97,Education,0 days 00:15:11,24,49,1974,2023-12-15 16:00:16,2023-12-25 23:00:40,11
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",True,False,en-US,266,6,0,1,Education,0 days 00:55:18,21,32,1069,2023-12-15 18:47:11,2023-12-25 23:00:40,11
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],False,False,en-US,0,0,0,0,Entertainment,0 days 00:00:00,2,64,1144,2023-12-15 16:35:30,2023-12-25 23:00:40,11


# Concat and Apply Days

In [166]:
def merge_clean_function(running_df, new_df):

    # cloud needs storage of names of dfs so it knows which one to add

    # cleaning the data

    clean_new_df = clean(new_df)

    # generate day column

    day_clean_new_df = day_gen(clean_new_df)
    
    # concat with running df

    running_df = pd.concat([running_df, day_clean_new_df])
    
    running_df.reset_index(drop=True, inplace=True)

    return running_df

# Testing with another days data

In [167]:
df_2 = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-26 23_00_41.csv')
df_2.head()

Unnamed: 0,video_id,channel_id,published_at,title,description,tags,category_id,duration,caption,licensed_content,default_language,content_rating,view_count,like_count,favourite_count,comment_count,extraction_date
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,2023-12-15T15:00:16Z,How to Build Custom ChatGPTs to Learn FAST in ...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",28,PT12M,True,True,en-US,{},4367,232,0,18,2023-12-26 23:00:41
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,2023-12-15T13:00:08Z,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",27,PT58S,False,True,en,{},29122,1170,0,20,2023-12-26 23:00:41
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,2023-12-15T16:00:16Z,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",27,PT15M11S,True,True,en-US,{},21151,1338,0,97,2023-12-26 23:00:41
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,2023-12-15T18:47:11Z,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",27,PT55M18S,True,False,en-US,{},270,6,0,1,2023-12-26 23:00:41
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,2023-12-15T16:35:30Z,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],24,P0D,False,False,en-US,{},0,0,0,0,2023-12-26 23:00:41


In [168]:
test_running_df = merge_clean_function(clean_df, df_2)

In [169]:
clean_df.shape

(55, 20)

In [170]:
df_2.shape

(59, 17)

In [171]:
test_running_df.shape

(114, 20)

In [172]:
test_running_df.head()

Unnamed: 0,video_id,channel_id,title,description,tags,caption,licensed_content,default_language,view_count,like_count,favourite_count,comment_count,category,duration_formatted,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted,Day
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,How to Build Custom ChatGPTs to Learn FAST in ...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",True,True,en-US,4240,225,0,18,Science & Technology,0 days 00:12:00,36,52,4434,2023-12-15 15:00:16,2023-12-25 23:00:40,11
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,28854,1161,0,19,Education,0 days 00:00:58,26,63,91,2023-12-15 13:00:08,2023-12-25 23:00:40,11
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",True,True,en-US,20779,1316,0,97,Education,0 days 00:15:11,24,49,1974,2023-12-15 16:00:16,2023-12-25 23:00:40,11
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",True,False,en-US,266,6,0,1,Education,0 days 00:55:18,21,32,1069,2023-12-15 18:47:11,2023-12-25 23:00:40,11
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],False,False,en-US,0,0,0,0,Entertainment,0 days 00:00:00,2,64,1144,2023-12-15 16:35:30,2023-12-25 23:00:40,11


# Adding another Day

In [173]:
df_3 = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-27 23_00_43.csv')
df_3.head()

Unnamed: 0,video_id,channel_id,published_at,title,description,tags,category_id,duration,caption,licensed_content,default_language,content_rating,view_count,like_count,favourite_count,comment_count,extraction_date
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,2023-12-15T15:00:16Z,How to Build Custom ChatGPTs to Learn FAST in ...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",28,PT12M,True,True,en-US,{},4457,233,0,18,2023-12-27 23:00:42
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,2023-12-15T13:00:08Z,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",27,PT58S,False,True,en,{},29266,1179,0,20,2023-12-27 23:00:42
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,2023-12-15T16:00:16Z,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",27,PT15M11S,True,True,en-US,{},21650,1357,0,97,2023-12-27 23:00:42
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,2023-12-15T18:47:11Z,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",27,PT55M18S,True,False,en-US,{},277,6,0,2,2023-12-27 23:00:42
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,2023-12-15T16:35:30Z,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],24,P0D,False,False,en-US,{},0,0,0,0,2023-12-27 23:00:42


In [174]:
test_2_running_df = merge_clean_function(test_running_df, df_3)

In [175]:
test_2_running_df.head()

Unnamed: 0,video_id,channel_id,title,description,tags,caption,licensed_content,default_language,view_count,like_count,favourite_count,comment_count,category,duration_formatted,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted,Day
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,How to Build Custom ChatGPTs to Learn FAST in ...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",True,True,en-US,4240,225,0,18,Science & Technology,0 days 00:12:00,36,52,4434,2023-12-15 15:00:16,2023-12-25 23:00:40,11
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,28854,1161,0,19,Education,0 days 00:00:58,26,63,91,2023-12-15 13:00:08,2023-12-25 23:00:40,11
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",True,True,en-US,20779,1316,0,97,Education,0 days 00:15:11,24,49,1974,2023-12-15 16:00:16,2023-12-25 23:00:40,11
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",True,False,en-US,266,6,0,1,Education,0 days 00:55:18,21,32,1069,2023-12-15 18:47:11,2023-12-25 23:00:40,11
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],False,False,en-US,0,0,0,0,Entertainment,0 days 00:00:00,2,64,1144,2023-12-15 16:35:30,2023-12-25 23:00:40,11


# Developing function for requesting database

In [176]:
def database_creator(data, days):

    # Filter by requested days

    filter_list = list(data[data['Day'] == int(days)]['video_id'])
    filtered_df = data[data['video_id'].isin(filter_list)]
    filtered_df = filtered_df[filtered_df['Day'] <= int(days)]

    # Flip
    


    # Add channel information



    return new_data

### Testing Flip

In [360]:
df_a = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-15 23_01_00 (2).csv')
df_b = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-16 23_00_40 (1).csv')
df_c = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-17 23_00_40 (1).csv')
df_d = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-18 23_00_52 (1).csv')
df_e = pd.read_csv('cloud_function_data_youtube_video_data_2023-12-19 23_00_41.csv')

In [361]:
clean_a = clean(df_a)
final_a = day_gen(clean_a)

In [362]:
test_flip_df = merge_clean_function(final_a, df_b)
test_flip_df = merge_clean_function(test_flip_df, df_c)
test_flip_df = merge_clean_function(test_flip_df, df_d)
test_flip_df = merge_clean_function(test_flip_df, df_e)

In [363]:
filter_list_flip = list(test_flip_df[test_flip_df['Day'] == 3]['video_id'])
filtered_test_flip_df = test_flip_df[test_flip_df['video_id'].isin(filter_list_flip)]
filtered_test_flip_df = filtered_test_flip_df[filtered_test_flip_df['Day'] <= 3]

In [364]:
len(filter_list_flip)

12

In [365]:
filtered_test_flip_df['Day'].value_counts()

1    12
2    12
3    12
Name: Day, dtype: int64

In [198]:
# adjust display options to show all columns
pd.set_option('display.max_columns', None)

In [239]:
filtered_test_flip_df.shape

(36, 20)

In [223]:
filtered_test_flip_df.head()

Unnamed: 0,video_id,channel_id,title,description,tags,caption,licensed_content,default_language,view_count,like_count,favourite_count,comment_count,category,duration_formatted,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted,Day
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,I Built Python & Data Analyst Custom ChatGPT f...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",True,True,en-US,1285,94,0,10,Science & Technology,0 days 00:12:00,37,62,4194,2023-12-15 15:00:16,2023-12-15 23:01:00,Day_1
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,4971,338,0,8,Education,0 days 00:00:58,26,63,91,2023-12-15 13:00:08,2023-12-15 23:01:00,Day_1
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",True,True,en-US,4912,456,0,27,Education,0 days 00:15:11,24,49,1932,2023-12-15 16:00:16,2023-12-15 23:01:00,Day_1
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",True,False,en-US,52,1,0,0,Education,0 days 00:55:18,21,32,1069,2023-12-15 18:47:11,2023-12-15 23:01:00,Day_1
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],False,False,en-US,0,0,0,0,Entertainment,0 days 00:00:00,2,64,1144,2023-12-15 16:35:30,2023-12-15 23:01:00,Day_1


In [350]:
filtered_test_flip_df.columns

Index(['video_id', 'channel_id', 'title', 'description', 'tags', 'caption',
       'licensed_content', 'default_language', 'view_count', 'like_count',
       'favourite_count', 'comment_count', 'category', 'duration_formatted',
       'no_of_tags', 'title_length', 'description_length',
       'published_at_formatted', 'extraction_date_formatted', 'Day'],
      dtype='object')

In [351]:
filtered_test_flip_df.head()

Unnamed: 0,video_id,channel_id,title,description,tags,caption,licensed_content,default_language,view_count,like_count,favourite_count,comment_count,category,duration_formatted,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted,Day
0,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,I Built Python & Data Analyst Custom ChatGPT f...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",True,True,en-US,1285,94,0,10,Science & Technology,0 days 00:12:00,37,62,4194,2023-12-15 15:00:16,2023-12-15 23:01:00,1
1,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,4971,338,0,8,Education,0 days 00:00:58,26,63,91,2023-12-15 13:00:08,2023-12-15 23:01:00,1
2,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",True,True,en-US,4912,456,0,27,Education,0 days 00:15:11,24,49,1932,2023-12-15 16:00:16,2023-12-15 23:01:00,1
3,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",True,False,en-US,52,1,0,0,Education,0 days 00:55:18,21,32,1069,2023-12-15 18:47:11,2023-12-15 23:01:00,1
4,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],False,False,en-US,0,0,0,0,Entertainment,0 days 00:00:00,2,64,1144,2023-12-15 16:35:30,2023-12-15 23:01:00,1


In [366]:
# Create a new column for each day's views
filtered_test_flip_df['Day'] = 'Day_' + filtered_test_flip_df['Day'].astype(str)
df_pivoted = filtered_test_flip_df.pivot_table(index='video_id', columns='Day', values=['view_count', 'like_count', 'comment_count'], aggfunc='first', fill_value=0).reset_index()

columns_to_keep = ['video_id', 'channel_id', 'title', 'description', 'tags', 'caption',
       'licensed_content', 'default_language','category', 'duration_formatted',
       'no_of_tags', 'title_length', 'description_length',
       'published_at_formatted', 'extraction_date_formatted']
df_unique_ids = filtered_test_flip_df[columns_to_keep].copy()

df_unique_ids.drop_duplicates(subset='video_id', keep='first', inplace=True)

df_unique_ids.reset_index(drop=True, inplace=True)

# Merge with the original DataFrame to include other information
result_df = pd.concat([df_pivoted, df_unique_ids], axis=1)

result_df

Unnamed: 0,"(video_id, )","(comment_count, Day_1)","(comment_count, Day_2)","(comment_count, Day_3)","(like_count, Day_1)","(like_count, Day_2)","(like_count, Day_3)","(view_count, Day_1)","(view_count, Day_2)","(view_count, Day_3)",video_id,channel_id,title,description,tags,caption,licensed_content,default_language,category,duration_formatted,no_of_tags,title_length,description_length,published_at_formatted,extraction_date_formatted
0,0wIDVTUWU3c,9,15,14,252,474,539,3699,8200,9429,LciDiBeBCyY,UCteRPiisgIoHtMgqHegpWAQ,I Built Python & Data Analyst Custom ChatGPT f...,Get your Excel data visualization template (fr...,"['data science', 'data scientist', 'self-taugh...",True,True,en-US,Science & Technology,0 days 00:12:00,37,62,4194,2023-12-15 15:00:16,2023-12-15 23:01:00
1,3QvcSvWb9es,17,28,34,414,546,625,7215,10896,12559,56i1uBshzmA,UCJublDh2UsiIKsAE1553miw,My Favorite Stack Question! | Daily Temperatur...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,Education,0 days 00:00:58,26,63,91,2023-12-15 13:00:08,2023-12-15 23:01:00
2,56i1uBshzmA,8,11,14,338,671,863,4971,14058,19248,OPyoXx0yA0I,UCVhQ2NnY5Rskt6UjCUkJ_DA,Requests vs Httpx vs Aiohttp | Which One to Pick?,"Exploring API communication in your app, consi...","['requests vs httpx', 'httpx', 'python request...",True,True,en-US,Education,0 days 00:15:11,24,49,1932,2023-12-15 16:00:16,2023-12-15 23:01:00
3,9Oh-hDT_VaY,25,32,35,491,724,829,5891,13803,18580,WjhKxXCwFZA,UCzL_0nIe8B4-7ShhVPfJkgw,Data Analytics and Generative AI,Generative AI is a rapidly evolving field with...,"['data analytics', 'generative ai', 'large lan...",True,False,en-US,Education,0 days 00:55:18,21,32,1069,2023-12-15 18:47:11,2023-12-15 23:01:00
4,H8nm5a99_DA,0,0,1,5,7,7,242,301,335,NI1Psgs1tyI,UCzL_0nIe8B4-7ShhVPfJkgw,Enterprise LLM Applications - Not Just a Techn...,Generative Al and Large Language Models have t...,[],False,False,en-US,Entertainment,0 days 00:00:00,2,64,1144,2023-12-15 16:35:30,2023-12-15 23:01:00
5,LciDiBeBCyY,10,15,15,94,162,176,1285,2688,3102,H8nm5a99_DA,UCObs0kLIrDjX2LLSybqNaEA,"From India to US: Tips, Insights and Experienc...",Welcome to the sixth episode of the Study Abro...,"['Great Learning', 'Data Analytics', 'Data Sci...",False,False,,Education,0 days 00:37:32,30,76,1433,2023-12-15 13:30:06,2023-12-15 23:01:00
6,NI1Psgs1tyI,0,0,0,0,0,0,0,0,0,0wIDVTUWU3c,UCJublDh2UsiIKsAE1553miw,Facebook Asked This Coding Question 50 TIMES!!...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,Education,0 days 00:00:58,26,68,91,2023-12-16 13:00:02,2023-12-16 23:00:40
7,OPyoXx0yA0I,27,47,53,456,862,983,4912,11558,13798,3QvcSvWb9es,UCJublDh2UsiIKsAE1553miw,I Couldn't Get This Coding Question - Can You?...,"leetcode, coding interview question, data stru...","['leetcode', 'coding interview question', 'dat...",False,True,en,Education,0 days 00:01:00,26,100,91,2023-12-16 01:00:01,2023-12-16 23:00:40
8,WjhKxXCwFZA,0,0,0,1,4,5,52,115,134,tjo9NJBmExM,UCzL_0nIe8B4-7ShhVPfJkgw,Vector Embeddings for Semantic Search,"In today's information-rich world, the ability...",[],False,False,en-US,Entertainment,0 days 00:00:00,2,37,1053,2023-12-15 23:51:29,2023-12-16 23:00:40
9,kX9iZTQNDb4,6,8,8,270,468,535,4191,9141,10296,9Oh-hDT_VaY,UCNU_lfiiWBdtULKOw6X0Dig,Power Of Open Source Contribution-50+ End To E...,Start Contributing in Open Source Projects The...,"['yt:cc=on', 'end to end ml projects', 'end to...",False,True,,Education,0 days 00:06:06,25,93,2373,2023-12-17 12:32:34,2023-12-17 23:00:40


In [278]:
df_pivoted

Unnamed: 0_level_0,video_id,comment_count,comment_count,comment_count,like_count,like_count,like_count,view_count,view_count,view_count
Day,Unnamed: 1_level_1,Day_1,Day_2,Day_3,Day_1,Day_2,Day_3,Day_1,Day_2,Day_3
0,0wIDVTUWU3c,9,15,14,252,474,539,3699,8200,9429
1,3QvcSvWb9es,17,28,34,414,546,625,7215,10896,12559
2,56i1uBshzmA,8,11,14,338,671,863,4971,14058,19248
3,9Oh-hDT_VaY,25,32,35,491,724,829,5891,13803,18580
4,H8nm5a99_DA,0,0,1,5,7,7,242,301,335
5,LciDiBeBCyY,10,15,15,94,162,176,1285,2688,3102
6,NI1Psgs1tyI,0,0,0,0,0,0,0,0,0
7,OPyoXx0yA0I,27,47,53,456,862,983,4912,11558,13798
8,WjhKxXCwFZA,0,0,0,1,4,5,52,115,134
9,kX9iZTQNDb4,6,8,8,270,468,535,4191,9141,10296
