# Youtube API. Hornet 750 release impact analysis.

In this project I want to analyze the impact that the release of the new hornet 750 is having in the motorciclng community in Spain.

In [7]:
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sb

In [8]:
api_key = "AIzaSyCaMjH-qcvfi8YmXTRPwkNxRb_nE72yzmg"

youtube = build('youtube', 'v3', developerKey=api_key)

## Function to search videos and channels talking about the new hornet 750.

In [9]:
def get_videos_channels(youtube):
    # initialize an empty list to store the videos
    videos = []
    # set the value of next page token as None
    next_page_token = None
    
    # loop until there are no more videos to get
    while True:
        # make an API request to search for videos
        request = youtube.search().list(
            part='snippet', 
            q='hornet 750', 
            publishedAfter="2022-10-01T00:00:00Z", 
            regionCode="ES", 
            relevanceLanguage="ES", 
            maxResults=50, 
            type="video", 
            pageToken=next_page_token # the page token for the next page of results
        )
    
        response = request.execute()
        
        # extend the videos list with the items in the response
        videos.extend(response['items'])
        
        # check if there is a next page token in the response
        if 'nextPageToken' in response:
            # if yes, set the next page token to the value in the response
            next_page_token = response['nextPageToken']
        else:
            break
    
    # create a dictionary to store the video data
    data = dict(
        publish_date=[], 
        video_id=[], 
        channel_id=[] 
    )
    
    # append the video's data to the data dictionary
    for video in videos:
        data["video_id"].append(video["id"]["videoId"])
        data["publish_date"].append(video["snippet"]["publishedAt"])
        data["channel_id"].append(video["snippet"]["channelId"])
               
    # create a Pandas DataFrame from the data dictionary and return it
    df = pd.DataFrame(data)
    return df

In [10]:
videosid_channelsid_table = get_videos_channels(youtube)

In [12]:
display(videosid_channelsid_table)

Unnamed: 0,publish_date,video_id,channel_id
0,2023-03-02T21:01:29Z,XmmEWk5Ns4o,UCmkkEZtKJr5CA4v4218yUHQ
1,2022-11-26T09:00:27Z,i_9zHrlFCwM,UCbH2dPOgtG419kbB5n2ADTw
2,2022-12-07T19:00:09Z,GDs7scfe2pA,UC-JjgxTWdLKFZqjbm7j7OCw
3,2022-12-01T14:34:59Z,u4C_PqYBKmY,UCF8RmRYnnRDweRz9vTSNyWA
4,2022-12-12T19:07:19Z,ROFC2tYHKSw,UCf6YZwgEGBgi9OhksMs6o8A
...,...,...,...
561,2023-03-05T08:56:27Z,wARPoTvrZpw,UCX9MX5e5kk6aVgy0tz-Jkug
562,2023-03-02T17:30:03Z,NbPaBA09R1I,UCkegEsZItEPQNItECCZA_pw
563,2023-03-05T15:30:06Z,1aKwH8zinN8,UCEGaKGsDtbcU3U_KGcjh0HA
564,2023-03-07T15:41:01Z,bQA53f_ueyk,UCS-cgYslpMpH5FkxJ2e0Vpg


## Function to get channel statistics

In [13]:
def get_channel_stats(youtube, channel_id):
    # Define the maximum number of results per API request
    max_results_per_request = 50

    # Initialize the data dictionary to store the results
    data = dict(channel_id=[],
                channel_name=[],
                channel_country=[],
                channel_viewCount=[],
                channel_subscriberCount=[],
                channel_videoCount=[])

    # Make multiple API requests using pagination
    for i in range(0, len(channel_id), max_results_per_request):
        # Get a chunk of the channel_id list
        id_chunk = channel_id[i:i + max_results_per_request]

        # Make an API request with the chunk of channel_id
        request = youtube.channels().list(part='snippet,contentDetails,statistics',
                                           id=','.join(id_chunk),
                                           maxResults=max_results_per_request)
        response = request.execute()

        # Append the results to the data dictionary
        for item in response['items']:
            data['channel_id'].append(item['id'])
            data['channel_name'].append(item['snippet']['title'])
            data['channel_country'].append(item['snippet'].get('country'))
            data['channel_viewCount'].append(item['statistics']['viewCount'])
            data['channel_subscriberCount'].append(item['statistics']['subscriberCount'])
            data['channel_videoCount'].append(item['statistics']['videoCount'])

    # Convert the data dictionary to a pandas dataframe and return it
    return pd.DataFrame(data)

In [124]:
channel_stats_table = get_channel_stats(youtube,videosid_channelsid_table ["channel_id"].tolist())

In [125]:
display(channel_stats_table)

Unnamed: 0,channel_id,channel_name,channel_country,channel_viewCount,channel_subscriberCount,channel_videoCount
0,UCbH2dPOgtG419kbB5n2ADTw,Todocircuito.com,ES,69573860,205000,1025
1,UCdGqEC2k0KhQCV2j9Kh2b8g,2 Wheels & Wings | MOTO POV,PT,790452,3350,71
2,UCmkkEZtKJr5CA4v4218yUHQ,Majes en Moto,ES,184384664,1050000,405
3,UCV6BMi51B6ABwwT4B9LRHvg,Motofichas,ES,9066753,32900,306
4,UC4Bnco22QUtHEk7dkKBrhiw,MOTORISMO,CO,12112187,79300,176
...,...,...,...,...,...,...
509,UCMSA4KvFVDy45aQ3OnrGADA,thierry vigneau Boiserie,FR,267282161,1360000,508
510,UCS-cgYslpMpH5FkxJ2e0Vpg,Newsader,LT,272365962,671000,1773
511,UCkegEsZItEPQNItECCZA_pw,Yammie Noob,US,305980657,1180000,1553
512,UCHcW0B4ul2INdp1Md6eUFbg,Tamayovisión (Canal Secundario),ES,39663929,213000,512


## Function to get video statistics

In [129]:
def get_video_stats(youtube, video_id):
    # Define the maximum number of results per API request
    max_results_per_request = 50

    # Initialize the data dictionary to store the results
    data = dict(video_id=[],
                video_title=[],
                video_publishedAt=[],
                video_channelId=[],
                video_viewCount=[],
                video_likeCount=[],
                video_favoriteCount=[],
                video_commentCount=[],
                video_duration=[]
               )

    # Make multiple API requests using pagination
    for i in range(0, len(video_id), max_results_per_request):
        # Get a chunk of the channel_id list
        id_chunk = video_id[i:i + max_results_per_request]

        # Make an API request with the chunk of video_id
        request = youtube.videos().list(part='snippet,contentDetails,statistics',
                                           id=','.join(id_chunk),
                                           maxResults=max_results_per_request)
        response = request.execute()

        # Append the results to the data dictionary
        for item in response['items']:
            data['video_id'].append(item['id'])
            data['video_title'].append(item['snippet']['title'])
            data['video_publishedAt'].append(item['snippet']['publishedAt'])
            data['video_channelId'].append(item['snippet']['channelId'])
            data['video_viewCount'].append(item['statistics']['viewCount'])
            data['video_likeCount'].append(item['statistics'].get('likeCount'))
            data['video_favoriteCount'].append(item['statistics']['favoriteCount'])
            data['video_commentCount'].append(item['statistics'].get('commentCount'))
            data['video_duration'].append(item['contentDetails']['duration'])

    # Convert the data dictionary to a pandas dataframe and return it
    return pd.DataFrame(data)

In [131]:
video_stats_table = get_video_stats(youtube,videosid_channelsid_table ["video_id"].tolist())

In [132]:
video_stats_table

Unnamed: 0,video_id,video_title,video_publishedAt,video_channelId,video_viewCount,video_likeCount,video_favoriteCount,video_commentCount,video_duration
0,XmmEWk5Ns4o,PRUEBO la NUEVA HORNET 750 [HONESTO y CON PREJ...,2023-03-02T21:01:29Z,UCmkkEZtKJr5CA4v4218yUHQ,195391,16606,0,1096,PT18M36S
1,i_9zHrlFCwM,HONDA CB750 Hornet | Prueba,2022-11-26T09:00:27Z,UCbH2dPOgtG419kbB5n2ADTw,50089,1851,0,345,PT14M18S
2,GDs7scfe2pA,Primer contacto Honda CB 750 HORNET | Motosx1000,2022-12-07T19:00:09Z,UC-JjgxTWdLKFZqjbm7j7OCw,29153,804,0,96,PT11M18S
3,u4C_PqYBKmY,Honda CB 750 Hornet 2023 | Prueba / Test / Rev...,2022-12-01T14:34:59Z,UCF8RmRYnnRDweRz9vTSNyWA,31610,1214,0,97,PT9M19S
4,ROFC2tYHKSw,Honda CB750 Hornet | Prueba y Opinión,2022-12-12T19:07:19Z,UCf6YZwgEGBgi9OhksMs6o8A,29746,646,0,137,PT34M51S
...,...,...,...,...,...,...,...,...,...
558,4c76KjSbcBU,KTM 790 Adventure - 2023,2023-03-03T20:38:18Z,UC6sRB1ceiQCERsaKMleJCVw,485,1,0,0,PT2M32S
559,NbPaBA09R1I,Is a 600cc Motorcycle BEGINNER FRIENDLY if you...,2023-03-02T17:30:03Z,UCkegEsZItEPQNItECCZA_pw,22412,922,0,165,PT8M21S
560,1aKwH8zinN8,Top 10 Roncos de motores 4 Cilindros/ melhores...,2023-03-05T15:30:06Z,UCEGaKGsDtbcU3U_KGcjh0HA,3636,582,0,96,PT12M6S
561,bQA53f_ueyk,"Reaper с ракетой на 550 км: ЖНЕЦ, которого жду...",2023-03-07T15:41:01Z,UCS-cgYslpMpH5FkxJ2e0Vpg,69052,5861,0,190,PT19M27S


## Initial checks and data cleaning

Now that we have obteined the data from the youtube API we can start cleaning and transforming it for future analysis. Since  both "video_stats_table" and "channel_stats_table" share the "channel_id" we are going to merge them.

In [138]:
#first we need to rename the chanel id colum in video_stats_table.
renamed_video_stats_table.rename(columns = {'video_channelId':'channel_id'}, inplace = True)

#now we can merge both tables on channel_id:
video_stats_table.merge()

Unnamed: 0,video_id,video_title,video_publishedAt,channel_id,video_viewCount,video_likeCount,video_favoriteCount,video_commentCount,video_duration
0,XmmEWk5Ns4o,PRUEBO la NUEVA HORNET 750 [HONESTO y CON PREJ...,2023-03-02T21:01:29Z,UCmkkEZtKJr5CA4v4218yUHQ,195391,16606,0,1096,PT18M36S
1,i_9zHrlFCwM,HONDA CB750 Hornet | Prueba,2022-11-26T09:00:27Z,UCbH2dPOgtG419kbB5n2ADTw,50089,1851,0,345,PT14M18S
2,GDs7scfe2pA,Primer contacto Honda CB 750 HORNET | Motosx1000,2022-12-07T19:00:09Z,UC-JjgxTWdLKFZqjbm7j7OCw,29153,804,0,96,PT11M18S
3,u4C_PqYBKmY,Honda CB 750 Hornet 2023 | Prueba / Test / Rev...,2022-12-01T14:34:59Z,UCF8RmRYnnRDweRz9vTSNyWA,31610,1214,0,97,PT9M19S
4,ROFC2tYHKSw,Honda CB750 Hornet | Prueba y Opinión,2022-12-12T19:07:19Z,UCf6YZwgEGBgi9OhksMs6o8A,29746,646,0,137,PT34M51S
...,...,...,...,...,...,...,...,...,...
558,4c76KjSbcBU,KTM 790 Adventure - 2023,2023-03-03T20:38:18Z,UC6sRB1ceiQCERsaKMleJCVw,485,1,0,0,PT2M32S
559,NbPaBA09R1I,Is a 600cc Motorcycle BEGINNER FRIENDLY if you...,2023-03-02T17:30:03Z,UCkegEsZItEPQNItECCZA_pw,22412,922,0,165,PT8M21S
560,1aKwH8zinN8,Top 10 Roncos de motores 4 Cilindros/ melhores...,2023-03-05T15:30:06Z,UCEGaKGsDtbcU3U_KGcjh0HA,3636,582,0,96,PT12M6S
561,bQA53f_ueyk,"Reaper с ракетой на 550 км: ЖНЕЦ, которого жду...",2023-03-07T15:41:01Z,UCS-cgYslpMpH5FkxJ2e0Vpg,69052,5861,0,190,PT19M27S
