Hier startet die Youtube API v3

In [1]:
import requests
import datetime
import pandas as pd

# YouTube Data API v3 Schlüssel
API_KEY_V3 = "AIzaSyCBw3tVk-TnYLCZW2vmojQlYFBArzudU-A"

def get_videos_in_date_range(channel_id, start_date, end_date):
    """
    Ruft Videos eines Kanals ab, die im angegebenen Datumsbereich veröffentlicht wurden.
    :param channel_id: Kanal-ID (z.B. "UC12345abcdEFG67890").
    :param start_date: Startdatum im Format 'YYYY-MM-DD'.
    :param end_date: Enddatum im Format 'YYYY-MM-DD'.
    :return: DataFrame mit Video-IDs, Titeln und Veröffentlichungsdatum.
    """
    base_url = "https://www.googleapis.com/youtube/v3/search"
    videos = []
    next_page_token = None

    while True:
        params = {
            "part": "snippet",
            "channelId": channel_id,
            "publishedAfter": f"{start_date}T00:00:00Z",
            "publishedBefore": f"{end_date}T23:59:59Z",
            "maxResults": 50,
            "order": "date",
            "type": "video",
            "pageToken": next_page_token,
            "key": API_KEY_V3
        }
        response = requests.get(base_url, params=params)
        data = response.json()

        if "items" in data:
            for item in data["items"]:
                video_info = {
                    "video_id": item["id"]["videoId"],
                    "title": item["snippet"]["title"],
                    "published_at": item["snippet"]["publishedAt"]
                }
                videos.append(video_info)

        next_page_token = data.get("nextPageToken")
        if not next_page_token:
            break

    return pd.DataFrame(videos)

In [None]:
#Variante 2
import requests
import pandas as pd

# YouTube Data API v3 Schlüssel
API_KEY_V3 = "AIzaSyCBw3tVk-TnYLCZW2vmojQlYFBArzudU-A"

def get_videos_with_details(channel_id, start_date, end_date):
    """
    Ruft Videos eines Kanals ab, einschließlich aller verfügbaren Daten.
    :param channel_id: Kanal-ID (z.B. "UC12345abcdEFG67890").
    :param start_date: Startdatum im Format 'YYYY-MM-DD'.
    :param end_date: Enddatum im Format 'YYYY-MM-DD'.
    :return: DataFrame mit allen Daten.
    """
    base_url_search = "https://www.googleapis.com/youtube/v3/search"
    base_url_details = "https://www.googleapis.com/youtube/v3/videos"
    videos = []
    next_page_token = None

    # Abrufen der Video-IDs im Datumsbereich
    while True:
        params = {
            "part": "snippet",
            "channelId": channel_id,
            "publishedAfter": f"{start_date}T00:00:00Z",
            "publishedBefore": f"{end_date}T23:59:59Z",
            "maxResults": 50,
            "order": "date",
            "type": "video",
            "pageToken": next_page_token,
            "key": API_KEY_V3
        }
        response = requests.get(base_url_search, params=params)
        data = response.json()

        if "items" in data:
            for item in data["items"]:
                video_id = item["id"]["videoId"]
                videos.append({"video_id": video_id})

        next_page_token = data.get("nextPageToken")
        if not next_page_token:
            break

    # Abrufen der Details zu den Video-IDs
    video_details = []
    for i in range(0, len(videos), 50):  # Aufteilen in Blöcke von 50 IDs (API-Limit)
        video_ids = ",".join([v["video_id"] for v in videos[i:i + 50]])
        params = {
            "part": "snippet,contentDetails,statistics",
            "id": video_ids,
            "key": API_KEY_V3
        }
        response = requests.get(base_url_details, params=params)
        data = response.json()

        if "items" in data:
            for item in data["items"]:
                snippet = item["snippet"]
                content_details = item["contentDetails"]
                statistics = item.get("statistics", {})
                video_details.append({
                    "video_id": item["id"],
                    "title": snippet["title"],
                    "published_at": snippet["publishedAt"],
                    "duration": content_details["duration"],  # ISO 8601 Format
                    "definition": content_details["definition"],
                    "views": statistics.get("viewCount", 0),
                    "likes": statistics.get("likeCount", 0),
                    "dislikes": statistics.get("dislikeCount", 0),
                    "comments": statistics.get("commentCount", 0)
                })

    return pd.DataFrame(video_details)

# Hauptlogik
if __name__ == "__main__":
    channel_id = "UCSeil5V81-mEGB1-VNR7YEA"  # Ersetze mit deiner Kanal-ID
    start_date = "2024-01-01"
    end_date = "2024-12-31"

    # Abrufen der Videos mit allen Details
    videos_df = get_videos_with_details(channel_id, start_date, end_date)
    print("Gefundene Videos mit Details:")
    print(videos_df)

    # Speichern in einer CSV-Datei
    output_file = "youtube_video_details_neu.csv"
    videos_df.to_csv(output_file, index=False)
    print(f"Daten gespeichert in {output_file}")


Hier beginnt die Youtube API zu arbeiten, falls google-auth-oauhlib nicht installiert, diese vorher installieren

In [None]:
!pip install google-auth-oauthlib google-api-python-client

In [2]:
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# YouTube Analytics API Authentifizierung
def authenticate_youtube_analytics_api():
    """
    Authentifiziert Zugriff auf die YouTube Analytics API.
    :return: Authentifizierter Analytics API-Client.
    """
    SCOPES = ['https://www.googleapis.com/auth/yt-analytics.readonly']
    credentials_file = r"C:\\Users\\laukat\\OneDrive - Mediengruppe RTL\\HDM Data Analyti\\oauth 2.0\\client_secret_796311161257-bnk32mvms5t9fsfma7agbgrlt5fo54gi.apps.googleusercontent.com.json"
    flow = InstalledAppFlow.from_client_secrets_file(credentials_file, SCOPES)
    credentials = flow.run_local_server(port=0)
    return build('youtubeAnalytics', 'v2', credentials=credentials)

def get_video_metrics(analytics, video_ids, start_date, end_date):
    """
    Ruft grundlegende Metriken für eine Liste von Videos ab.
    :param analytics: Authentifizierter Analytics API-Client.
    :param video_ids: Liste von Video-IDs.
    :param start_date: Startdatum im Format 'YYYY-MM-DD'.
    :param end_date: Enddatum im Format 'YYYY-MM-DD'.
    :return: DataFrame mit grundlegenden Metriken pro Video.
    """
    # Entfernt `impressions` und `impressionClickThroughRate` aus der Metrikliste
    metrics = (
        "views,likes,dislikes,comments,shares,estimatedMinutesWatched,"
        "averageViewDuration"
    )
    results = []

    for video_id in video_ids:
        try:
            response = analytics.reports().query(
                ids="channel==MINE",
                filters=f"video=={video_id}",
                startDate=start_date,
                endDate=end_date,
                metrics=metrics,
                dimensions="video",
                sort="-views"
            ).execute()

            if "rows" in response:
                columns = [header["name"] for header in response["columnHeaders"]]
                for row in response["rows"]:
                    results.append(dict(zip(columns, row)))

        except Exception as e:
            print(f"Fehler beim Abrufen der Metriken für Video-ID {video_id}: {e}")

    return pd.DataFrame(results)


Jetzt werden die Impressions und CTR abgerufen

In [3]:
def get_video_impressions_and_ctr(analytics, video_ids, start_date, end_date):
    """
    Ruft Impressions und CTR für eine Liste von Videos ab.
    :param analytics: Authentifizierter Analytics API-Client.
    :param video_ids: Liste von Video-IDs.
    :param start_date: Startdatum im Format 'YYYY-MM-DD'.
    :param end_date: Enddatum im Format 'YYYY-MM-DD'.
    :return: DataFrame mit Impressions und CTR pro Video.
    """
    metrics = "impressions,impressionClickThroughRate"
    results = []

    for video_id in video_ids:
        try:
            response = analytics.reports().query(
                ids="channel==MINE",
                filters=f"video=={video_id}",
                startDate=start_date,
                endDate=end_date,
                metrics=metrics,
                dimensions="video"
            ).execute()

            if "rows" in response:
                columns = [header["name"] for header in response["columnHeaders"]]
                for row in response["rows"]:
                    results.append(dict(zip(columns, row)))

        except Exception as e:
            print(f"Fehler beim Abrufen der Impressions und CTR für Video-ID {video_id}: {e}")

    return pd.DataFrame(results)


Hier ist der Hauptteil

In [None]:
# Hauptlogik
if __name__ == "__main__":
    analytics = authenticate_youtube_analytics_api()
    video_ids = ["VIDEO_ID_1", "VIDEO_ID_2"]  # Ersetze mit tatsächlichen IDs
    start_date = "2023-01-01"
    end_date = "2023-12-31"

    # Grundlegende Metriken abrufen
    basic_metrics_df = get_video_metrics(analytics, video_ids, start_date, end_date)
    print("Grundlegende Metriken:")
    print(basic_metrics_df)

    # Impressions und CTR abrufen
    impressions_df = get_video_impressions_and_ctr(analytics, video_ids, start_date, end_date)
    print("Impressions und CTR:")
    print(impressions_df)

    # Daten kombinieren
    final_df = pd.merge(basic_metrics_df, impressions_df, on="video", how="left")
    print("Kombinierte Daten:")
    print(final_df)

    # In eine CSV-Datei speichern
    final_df.to_csv("youtube_combined_data.csv", index=False)
    print("Daten erfolgreich gespeichert in 'youtube_combined_data.csv'")

