In [None]:
!pip install matplotlib-venn
!pip install isodate
import requests
import pandas as pd
import isodate


API_KEY = "API key"

SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"
VIDEO_DETAILS_URL = "https://www.googleapis.com/youtube/v3/videos"
channel_id = "UC2tsySbe9TNrI-xh2lximHA"  # ID de la chaîne cible

video_ids = []
next_page_token = None


while True:
    search_params = {
        'part': 'snippet',
        'channelId': channel_id,
        'maxResults': 50,
        'order': 'date',
        'type': 'video',
        'pageToken': next_page_token,
        'key': API_KEY,
    }
    response = requests.get(SEARCH_URL, params=search_params)
    search_data = response.json()

    if 'items' not in search_data:
        print("Erreur dans la récupération des vidéos :", search_data)
        break

    video_ids.extend([item['id']['videoId'] for item in search_data['items']])

    next_page_token = search_data.get('nextPageToken')
    if not next_page_token:
        break

print(f"Nombre de vidéos récupérées : {len(video_ids)}")

video_details = []

def format_duration(iso_duration):
    """Formater la durée ISO 8601 en un format lisible (MM:SS)."""
    try:
        duration = isodate.parse_duration(iso_duration)
        total_seconds = int(duration.total_seconds())
        minutes, seconds = divmod(total_seconds, 60)
        return f"{minutes:02}:{seconds:02}"
    except:
        return "N/A"

for i in range(0, len(video_ids), 50):
    video_id_chunk = ",".join(video_ids[i:i+50])
    details_params = {
        'part': 'snippet,statistics,contentDetails',
        'id': video_id_chunk,
        'key': API_KEY,
    }
    details_response = requests.get(VIDEO_DETAILS_URL, params=details_params)
    details_data = details_response.json()

    if 'items' not in details_data:
        print("Erreur dans la récupération des détails :", details_data)
        continue

    for video in details_data['items']:
        snippet = video['snippet']
        statistics = video.get('statistics', {})
        content_details = video.get('contentDetails', {})

        video_details.append({
            'Video ID': video['id'],
            'Title': snippet['title'],
            'Description': snippet['description'],
            'Published Date': snippet['publishedAt'],
            'Tags': ", ".join(snippet.get('tags', [])),
            'Views': statistics.get('viewCount', 'N/A'),
            'Likes': statistics.get('likeCount', 'N/A'),
            'Comments': statistics.get('commentCount', 'N/A'),
            'Duration': format_duration(content_details.get('duration', 'N/A')),
            'Product Placement': content_details.get('hasCustomThumbnail', False)
        })

print(f"Nombre de vidéos avec détails récupérées : {len(video_details)}")

if video_details:
    df = pd.DataFrame(video_details)
    print("Aperçu des données :", df.head())
    df.to_csv("a4_videos_with_placements.csv", index=False)
    print("Les données ont été exportées dans 'a4_videos_with_placements.csv'.")
else:
    print("Aucune donnée à exporter.")


Nombre de vidéos récupérées : 496
Nombre de vidéos avec détails récupérées : 496
Aperçu des données :       Video ID                                              Title  \
0  RAiNZqEdegQ        Экстремальные Прятки от ЭЛИТНОГО СПЕЦНАЗА !   
1  UW9ZCdZsFDc                    When mom says she’s almost home   
2  cMCV8-9qpwM              Don’t choose the wrong window 🪟🏃🏻‍♂️❌   
3  oA_e_s_wuwc  How Many Layers of Duct Tape Does It Take to S...   
4  -WAM1JAPIw4                     Выживаю в КИТАЕ на 1$ ! День 3   

                                         Description        Published Date  \
0  Встречай новогодние Вайбики в Бургер Кинг! Пок...  2024-12-13T12:00:24Z   
1                    When mom says she’s almost home  2024-12-11T14:18:48Z   
2              Don’t choose the wrong window 🪟🏃🏻‍♂️❌  2024-12-04T13:36:31Z   
3  How Many Layers of Duct Tape Does It Take to S...  2024-12-02T12:30:48Z   
4  Если вы хотите такие же крутые шмотки как у Ма...  2024-11-29T12:30:00Z   

              

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 496 entries, 0 to 495
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Video ID           496 non-null    object
 1   Title              496 non-null    object
 2   Description        496 non-null    object
 3   Published Date     496 non-null    object
 4   Tags               496 non-null    object
 5   Views              496 non-null    object
 6   Likes              496 non-null    object
 7   Comments           496 non-null    object
 8   Duration           496 non-null    object
 9   Product Placement  496 non-null    bool  
dtypes: bool(1), object(9)
memory usage: 35.5+ KB


In [None]:
# change dtypes "Published Date", "Views", "Likes"
df_clean = df.copy()
df_clean[["Views", "Likes"]] = df_clean[["Views", "Likes"]].astype(int)
df_clean["Published Date"] = pd.to_datetime(df_clean["Published Date"])
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 496 entries, 0 to 495
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   Video ID           496 non-null    object             
 1   Title              496 non-null    object             
 2   Description        496 non-null    object             
 3   Published Date     496 non-null    datetime64[ns, UTC]
 4   Tags               496 non-null    object             
 5   Views              496 non-null    int64              
 6   Likes              496 non-null    int64              
 7   Comments           496 non-null    object             
 8   Duration           496 non-null    object             
 9   Product Placement  496 non-null    bool               
dtypes: bool(1), datetime64[ns, UTC](1), int64(2), object(6)
memory usage: 35.5+ KB


In [None]:
# error downloading dataframe in big query
# solution: drop "Description" column (emojis)
df_clean = df_clean.drop("Description", axis=1)
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 496 entries, 0 to 495
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   Video ID           496 non-null    object             
 1   Title              496 non-null    object             
 2   Published Date     496 non-null    datetime64[ns, UTC]
 3   Tags               496 non-null    object             
 4   Views              496 non-null    int64              
 5   Likes              496 non-null    int64              
 6   Comments           496 non-null    object             
 7   Duration           496 non-null    object             
 8   Product Placement  496 non-null    bool               
dtypes: bool(1), datetime64[ns, UTC](1), int64(2), object(5)
memory usage: 31.6+ KB


In [None]:
df_clean.to_csv("a4_videos.csv", index= False)