# Pull All Late Night TV Show Youtube Stats

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sys
from googleapiclient.discovery import build
import os

sys.path.append("../007-youtube-data-pull/")

from yt_pullers import pull_all_video_info, get_channel_id

with open("/home/robmulla/Repos/data-science-youtube-stats/apikey/youtube.key") as f:
    youTubeApiKey = f.read()
# youTubeApiKey = _ #Read here
youtube = build("youtube", "v3", developerKey=youTubeApiKey)

%load_ext lab_black

In [2]:
def add_publish_cols(df):
    df["publish_hour"] = df["publishTime"].dt.hour
    df["publish_minute"] = df["publishTime"].dt.minute
    df["publish_clock"] = df["publish_hour"] + df["publish_minute"] / 60
    df["publishTime"] = pd.to_datetime(df["publishTime"])
    df["publishTimeEastern"] = df["publishTime"].dt.tz_convert("US/Eastern")
    df["publish_hour_eastern"] = df["publishTimeEastern"].dt.hour
    df["publish_minute_eastern"] = df["publishTimeEastern"].dt.minute
    df["publish_clock_eastern"] = (
        df["publish_hour_eastern"] + df["publish_minute_eastern"] / 60
    )

    df["viewCountM"] = df["viewCount"] / 1_000_000  # Million Views
    return df

# Loop Through Channels and Years and Pull Video Data

In [3]:
channels = [
    "Stephen Colbert",
    "Jimmy Kimmel",
    "Jimmy Fallon",
    "Seth Meyers",
    "Daily Show",
]

for channel in channels:
    for year in [2020, 2021, 2022]:
        _, channelTitle = get_channel_id(channel, youtube, return_name=True)
        channelTitle = "_".join(channelTitle.split(" "))
        if os.path.exists(f"{channelTitle}-{year}.csv"):
            print(f"It exists! {channelTitle}-{year}.csv")
            continue
        df = pull_all_video_info(
            channel,
            youtube,
            return_df=True,
            publishedAfter=f"{year}-01-01T00:00:00Z",
            publishedBefore=f"{year}-12-31T00:00:00Z",
            max_results=2_000,
        )
        channelTitle = df["channelTitle"].values[0]
        df = add_publish_cols(df)
        df.to_csv(f"{channelTitle}-{year}.csv", index=False)

It exists! The_Late_Show_with_Stephen_Colbert-2020.csv
It exists! The_Late_Show_with_Stephen_Colbert-2021.csv
It exists! The_Late_Show_with_Stephen_Colbert-2022.csv
It exists! Jimmy_Kimmel_Live-2020.csv
It exists! Jimmy_Kimmel_Live-2021.csv
It exists! Jimmy_Kimmel_Live-2022.csv


 20%|██████████████▊                                                           | 8/40 [00:02<00:11,  2.78it/s]


It exists! The_Tonight_Show_Starring_Jimmy_Fallon-2021.csv
It exists! The_Tonight_Show_Starring_Jimmy_Fallon-2022.csv


 20%|██████████████▊                                                           | 8/40 [00:02<00:10,  3.01it/s]


It exists! Late_Night_with_Seth_Meyers-2021.csv
It exists! Late_Night_with_Seth_Meyers-2022.csv


 22%|████████████████▋                                                         | 9/40 [00:03<00:10,  2.87it/s]


It exists! The_Daily_Show_with_Trevor_Noah-2021.csv
It exists! The_Daily_Show_with_Trevor_Noah-2022.csv
