In [None]:
!pip install --upgrade google-api-python-client

In [None]:
!pip install --upgrade google-auth-oauthlib google-auth-httplib2

In [1]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON
import urllib.request
import re

Los proyectos que habilitan la API de datos de YouTube tienen una asignaci√≥n de cuota predeterminada de 10,000 unidades por d√≠a, una cantidad suficiente para la gran mayor√≠a de los usuarios de la API. La cuota predeterminada, que est√° sujeta a cambios, nos ayuda a optimizar las asignaciones de cuotas y a escalar nuestra infraestructura de una manera que sea m√°s significativa para nuestros usuarios de API. Puedes ver el uso de tu cuota en la p√°gina Cuotas en la Consola de API.

Nota: Si alcanzas el l√≠mite de cuota, puedes solicitar un aumento del cuota a trav√©s del formulario de solicitud de extensi√≥n de cuota para los servicios de la API de YouTube.

Calcula el uso de la cuota

Google calcula el uso de tu cuota mediante la asignaci√≥n de un costo a cada solicitud. Los diferentes tipos de operaciones tienen diferentes costos de cuotas. Por ejemplo:

Una operaci√≥n de lectura que recupera una lista de recursos (canales, videos o listas de reproducci√≥n) suele costar 1 unidad.
Una operaci√≥n de escritura que crea, actualiza o borra un recurso suele tener un costo de 50 unidades.
Una solicitud de b√∫squeda cuesta 100 unidades.
La carga de un video cuesta 1600 unidades.

## Definici√≥n API

In [2]:
api_key = "XXX"
api_service_name = "youtube"
api_version = "v3"

youtube = build(
    api_service_name, api_version, developerKey=api_key)

## Obtencion comentarios en videos del Canal

Buscar en el codigo fuente de la pagina para obtener el ID <br>
https://www.youtube.com/channel/

### Obtenci√≥n de Id del canal

In [3]:
def extract_channel_ids(urls):
    all_channels = []
    for url in urls:
        with urllib.request.urlopen(url) as response:
            html_content = response.read().decode('utf-8')

        pattern = r'https://www.youtube.com/channel/([^"]+)'
        matches = re.findall(pattern, html_content)

        all_channels.append(matches[0])

    return all_channels

In [4]:
urls = [
    'https://www.youtube.com/@Samsung',
    #More channels
]
channel_ids = extract_channel_ids(urls)

In [5]:
def get_channel_stats(youtube, channel_ids):
    all_data = []

    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
                }
        all_data.append(data)

    return (pd.DataFrame(all_data))

In [6]:
channel_stats = get_channel_stats(youtube, channel_ids)
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Samsung,6400000,1531062109,1794,UUWwgaK7x0_FR1goeSRazfsQ


### Obtencion videos del canal

In [10]:
def get_video_ids(youtube, playlist_id, limit=None, output_file="../Data/youtube/samsung_yt_video_ids.txt"):
    video_ids = []
    next_page_token = True
    total_results = 0

    while next_page_token is not None and (limit is None or total_results < limit):
        request = youtube.playlistItems().list(
            part="snippet,contentDetails",
            playlistId=playlist_id,
            maxResults=50  # M√°ximo permitido por la API
        )
        response = request.execute()

        for item in response['items']:
            video_id = item['contentDetails']['videoId']
            video_ids.append(video_id)
            total_results += 1

            # Escribir el ID del video en el archivo de salida
            with open(output_file, "a") as file:
                file.write(video_id + "\n")

            if limit is not None and total_results == limit:
                break

        next_page_token = response.get('nextPageToken')

    return video_ids


In [11]:
video_ids = []
for index, row in channel_stats.iterrows():
    playlist_id = row['playlistId']
    video_ids.extend(get_video_ids(youtube, playlist_id, limit=1794))

video_ids

['IzRSSykkc8U',
 'FoSSammFUqg',
 'QUENvNWVwp4',
 'SWh6VwXtZz4',
 'qHkDBfv74G4',
 'nDjl_R-noLw',
 'flg9px2X_C8',
 'WPYoUbUlf3M',
 'AeodzI4CdlE',
 'GBMDlV0W-dc',
 '2-Qf3-CTzX4',
 'RcdL9c7gerk',
 '4P65EJKIUwM',
 'KRnrk3EPdVE',
 'fjlaXzVYg44',
 'RQOpTsT_Svs',
 'vAEEg-OUrDg',
 'sYwHHoLlIIU',
 'lCL5ISyBkP4',
 'C_nOaWb2uRU',
 'jkpGvdmNSfU',
 'QY0ZRnhOKis',
 '6MQgq7zLB5Y',
 '0_7v3HBM-ds',
 '08JME7cL-6c',
 '242XX-vChOg',
 'dpQgdLOoE5c',
 'w_liHU_V2Rw',
 '8EDuzxX_IWo',
 'Jv6lwUSoEo0',
 'xlOS4kQPxf0',
 'anMLprFevKo',
 'svoc2dfPDpc',
 'UWtNvCjobX4',
 'hBHTOA_WBVo',
 'DS8l_GODmkU',
 '9XAebLYhVRY',
 'lkkbScpePXI',
 'LP1udQBcggM',
 'OSJwBAi9DpM',
 'hbWA5tt6Z8c',
 'Bsulx9U4xhI',
 '7VrJlocfTGU',
 'wXhV4W-2ipg',
 'pfk5b56McaE',
 'qwjjZ0fHBV0',
 'U12uTjXIGeI',
 'IVlLmMsyS8o',
 'Gml1zoCGev8',
 'XK9UGN3JEHU',
 'IzRSSykkc8U',
 'FoSSammFUqg',
 'QUENvNWVwp4',
 'SWh6VwXtZz4',
 'qHkDBfv74G4',
 'nDjl_R-noLw',
 'flg9px2X_C8',
 'WPYoUbUlf3M',
 'AeodzI4CdlE',
 'GBMDlV0W-dc',
 '2-Qf3-CTzX4',
 'RcdL9c7gerk',
 '4P65EJ

### Obtencion estad√≠sticas de v√≠deos

In [12]:
def get_video_details(youtube, video_ids):
        all_video_info = []

        for i in range(0, len(video_ids), 50):
                request = youtube.videos().list(
                        part="snippet,contentDetails,statistics",
                        id=','.join(video_ids[i:i+50])
                )
                response = request.execute()

                for video in response['items']:
                        stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                                        'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                                        'contentDetails': ['duration', 'definition', 'caption']
                                        }
                        video_info = {}
                        video_info['video_id'] = video['id']

                        for k in stats_to_keep.keys():
                                for v in stats_to_keep[k]:
                                        try:
                                                video_info[v] = video[k][v]
                                        except:
                                                video_info[v] = None
                        
                        all_video_info.append(video_info)

        return pd.DataFrame(all_video_info)

In [15]:
#with open('../Data/youtube/samsung_yt_video_ids.txt', 'r') as file:
    #video_ids = file.read().splitlines()

video_df = get_video_details(youtube, video_ids)
video_df.to_csv('../Data/youtube/samsung_yt_video_stats.csv', sep=';', index=False)
video_df.head(5)

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,IzRSSykkc8U,Samsung,"Samsung Unpacked: Join the expressive side, fo...",Learn more: http://smsng.co/JTFS_Expressive_yt...,"[Samsung, join the flip side, latest samsung g...",2023-07-12T13:00:18Z,8501,835,,21,PT14S,hd,False
1,FoSSammFUqg,Samsung,Samsung Galaxy Unpacked: Visit us at Galaxy Ex...,Who's open to trying new things? Come visit us...,"[galaxy unpacked, galaxy unpacked 2023, ilp, n...",2023-07-12T12:00:22Z,3934,379,,7,PT18S,hd,False
2,QUENvNWVwp4,Samsung,Samsung Galaxy: Join the flip side,What others see as ordinary; we see as an oppo...,"[Samsung‚Äã, join the flip side, latest¬†samsung¬†...",2023-07-12T11:00:20Z,29488,1519,,88,PT20S,hd,False
3,SWh6VwXtZz4,Samsung,Display Solutions: Case Study - SoFi Stadium |...,Samsung‚Äôs display solutions create technologic...,[samsung],2023-07-11T08:01:25Z,7434,444,,33,PT3M17S,hd,True
4,qHkDBfv74G4,Samsung,"With experts impressed by our TVs, it‚Äôs now yo...",Samsung has done it again! We have been ranked...,"[2023 Neo QLED 4K, 2023 TV, 4K TV, Best Screen...",2023-07-11T02:02:54Z,18251,870,,66,PT2M35S,hd,False


### Comentarios

In [16]:
def get_comments_in_videos(youtube, video_ids, limit):
    all_comments = []
    
    for video_id in video_ids:
        try:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=limit
            )
            response = request.execute()
        
            comments_in_video = [
                {
                    'comment': comment['snippet']['topLevelComment']['snippet']['textOriginal'],
                    'date': comment['snippet']['topLevelComment']['snippet']['publishedAt']
                }
                for comment in response['items']
            ]
            
            for comment_info in comments_in_video:
                comment_info['video_id'] = video_id
                all_comments.append(comment_info)
        
        except Exception as e:
            print(f"Could not get comments for video {video_id}: {str(e)}")
    
    return pd.DataFrame(all_comments)

In [17]:
#with open('../Data/youtube/samsung_yt_video_ids.txt', 'r') as file:
    #video_ids = file.read().splitlines()

comments_df = get_comments_in_videos(youtube, video_ids , limit=None)
comments_df.to_csv('../Data/youtube/samsung_yt_video_comments.csv', sep=';', index=False)
comments_df

Unnamed: 0,comment,date,video_id
0,We can‚Äôt wait to help you celebrate every face...,2023-07-13T00:05:49Z,IzRSSykkc8U
1,-<ÌÇπÍ∞ì ÏÇºÏÑ± ÏòÅÏõêÌïòÎ¶¨>-,2023-07-13T11:00:27Z,IzRSSykkc8U
2,Samsung üòä‚ù§,2023-07-13T02:25:39Z,IzRSSykkc8U
3,"samsung has the best phones i ever seen, that ...",2023-07-13T01:01:28Z,IzRSSykkc8U
4,Anybody know what time on July 26th?,2023-07-13T00:47:17Z,IzRSSykkc8U
...,...,...,...
31268,Grande AMLO.,2023-06-21T23:39:21Z,wXhV4W-2ipg
31269,Awesome!,2023-06-21T20:03:56Z,wXhV4W-2ipg
31270,Ïù¥Í±∏ ÏÇºÏÑ±Ïù¥ Ìï©ÎãàÎã§. Ï†ïÎßê~ÎåÄÎã®Ìïú Ïï†Íµ≠ Í∏∞ÏóÖÏûÖÎãàÎã§ üéäüéäüëçüëçüëçüéäüéä,2023-06-21T14:29:48Z,wXhV4W-2ipg
31271,Korea has hosted Expo in Yeosu 2012. For balan...,2023-06-21T13:08:42Z,wXhV4W-2ipg
