# Troubleshooting PRECONDITION_FAILED error

Youtube's internal API has changed and now seems to require both the `context.client.visitorData` fields and the `context.request.attestationResponseData` object:

```json
{
    "context": {
        "client": {
            "visitorData": "Cgt4YndfUjRMNDd...",
            "userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0,gzip(gfe)",
            "clientName": "WEB",
            "clientVersion": "2.20251222.04.00"
        },
        "request": {
            "attestationResponseData": {
                "challenge": "a=6&a2=10&b=4pGJcytzFKRXtSh_mtRrrAPR6NA&c=1766585477&d=1&t=21600&c1a=1&c6a=1&c6b=1&hh=jme44dPEfzssT6Y0hd5koMdguh-8S7QszeKOHCvxKzw",
                "webResponse": "$Nqk5qfFRAAZhip..."
            }
        }
    },
    "params": "CgtkUXc0dzlXZ1hjURIOQ2dBU0FtVnVHZ0ElM0QYASozZW5nYWdlbWVudC1wYW5lbC1zZWFyY2hhYmxlLXRyYW5zY3JpcHQtc2VhcmNoLXBhbmVsMAE4AUAB",
    "externalVideoId": "dQw4w9WgXcQ"
}
```



In [1]:
from yt_transcript_fetcher.api import YouTubeTranscriptFetcher

def test_fetch_with_context(user_context, session=None):
    video_id = "dQw4w9WgXcQ"
    fetcher = YouTubeTranscriptFetcher(session=session)
    fetcher._context = user_context
    
    print("Attempting to list languages with injected context...")
    try:
        languages = fetcher.list_languages(video_id)
        print("Success!")
    except Exception as e:
        print(f"Failed: {e}")

user_context = {
    "client": {
        "visitorData": "Cgt4YndfUjRMNDdBQSiF6a_KBjIKCgJHQhIEGgAgXmLgAgrdAjEyLllURT1xTUUxeWZOeWxVQVZHN0FXUV9tNzNNYzNCMHZiMXBsdmhkcFFFOURBTGpPRjltaWVFcjF6dG1MVUZ3aDl3UExWMnpZWEJ6MTJrdktTMWkyYzVXSGJWc0EyTThkZWlMYjB1bDhaTmRDdUpBbG8wd1FUbm1nc0Nyd0wtRFc3OWZWLVNROHZSbXFyZXdVTVY5VEw3S2pBaTBaVTNFRmpqbXBSb1VJX0d3cTNscFVjd1hlYzVzVzZLLWpuY0V0WE04b3JCa3dBbDRiR21uSVFwaXhSVHIwQnZEaGdVWWtjQnlPM0dlNmJWdEVVTThya3J4SF9iNnRmVUcweEY1bDRlRzZkblU3SWo4QnFDMlNYc05lMjJsQzVzZ0JzRDd4UGswdy1ya3JhUG8yV3dNZG5wdjY2Y2h0NVhSR2xERjNleHBDSkFrQWRKV0laamN6d20tdVVCTEpXTVE%3D",
        "userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0,gzip(gfe)",
        "clientName": "WEB",
        "clientVersion": "2.20251222.04.00"
    }
}

test_fetch_with_context(user_context)

Attempting to list languages with injected context...
Failed: API precondition failed for video dQw4w9WgXcQ. This may indicate that transcripts are not available.
Response: {'error': {'code': 400, 'message': 'Precondition check failed.', 'errors': [{'message': 'Precondition check failed.', 'domain': 'global', 'reason': 'failedPrecondition'}], 'status': 'FAILED_PRECONDITION'}}


# Idea: Can we get the `visitorData` easily?

There's a POST request to `https://www.youtube.com/youtubei/v1/account/get_setting_values?prettyPrint=false` that might allow us to set the `visitorData` param for subsequent requests. 

In [2]:
import requests

data = {
  "context": {
    "client": {
      "hl": "en-GB",
      "gl": "GB",
      "remoteHost": "31.111.85.200",
      "deviceMake": "",
      "deviceModel": "",
      "visitorData": "Cgt3TjFTNzhYYklMWSiPy8nKBjIKCgJHQhIEGgAgPmLgAgrdAjEyLllURT1vV2k4VnBka1NtdUJ4ZWx4cl9lU2lka1FPaXBsamZfSy1oSzUzVjIxUVNiMnZmY2FFSE85SHdhQ3JySExQaEVsQUF4eWxmV2VPeVlPMnozeWswNlVyZE1ONWExYzBibHYyUGtzdTBnQ2tTMWkyUURkVXhOTXZjeU5QNUVaUjEyZklOOVQxNmxOcEI2TjJEOVpVQk5YWmdsLV8yTVFDR3dMVzF1akttLVkza2tOM2Nhc2hFczRfQTU2c05QUHBWTE1Uc3M4Ny1fSGljcEhfQlRjX2hINXhiTlZ1OWMwSU81ZUN5QUlhbjBMc2lwdVN1V0tUdmdDdmpjUWZiSEdfbkxUaHNLV09JQTEzdFlTaTFoUzVzVVFKTlBnNU1mWXBDcFk3YURMZ1Q0eDBMTnZQLUR6SEdJTlZFUzE1UEJoOHhJN1F0NVRwYmswLWgtajB1QzRpWmNVUXc%3D",
      "userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0,gzip(gfe)",
      "clientName": "WEB",
      "clientVersion": "2.20251222.04.00",
      "osName": "X11",
      "osVersion": "",
      "originalUrl": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
      "screenPixelDensity": 2,
      "platform": "DESKTOP",
      "clientFormFactor": "UNKNOWN_FORM_FACTOR",
      "configInfo": {
        "appInstallData": "CI_LycoGEIOe0BwQvbauBRCu1s8cELTB0BwQjOnPHBDwtNAcEJzXzxwQ2q7QHBDawdAcENjGzxwQt8nPHBDFxs8cEKKFuCIQ3rzOHBDgzbEFEPHMzxwQmrnQHBCcuNAcEMvRsQUQgo_PHBC9mbAFEJS20BwQzOvPHBD8ss4cELvZzhwQ8rPQHBDSvdAcEIv3zxwQibDOHBC8v9AcELjkzhwQs5DPHBDT4a8FENiW0BwQw5HQHBD01c4cEJT-sAUQyrvQHBCJ6K4FEMDbzxwQprbQHBDmh9AcEObgzxwQvYqwBRC8pNAcEJbbzxwQiIewBRCV988cEJmNsQUQudnOHBCPudAcEOHBgBMQlPLPHBDN0bEFEJOD0BwQh6zOHBCi-88cENr3zhwQyfevBRC4wNAcELfq_hIQt4bPHBDBj9AcEIHNzhwQ8p3QHBDxnLAFEOLUrgUQ9quwBRDYrtAcEKefqRcQ5aTQHBC8s4ATEPjE0BwQzN-uBRCrnc8cEPHE0BwQ2rTQHBDE9M8cEMa9gBMQppqwBRDI988cEMe20BwQm8LQHBCNsNAcEIeD0BwQndCwBRCsrNAcELGwgBMQ_cKAExDQoNAcEKqK0BwqbENBTVNUUlZPLVpxLURNZVVFdllBaVFhcUFzd0Z2OEhtQ19DeEVvZE1NcUNzQkFQTHZnWDZPZm1DQnFBR29pNmFJZkZQemdfdFhQVXY5Zy1GRk9JajdwMEY0aGJHS29NdTZST2VTNElvQmgwSDAA"
      },
      "screenDensityFloat": 1.6666666666666667,
      "userInterfaceTheme": "USER_INTERFACE_THEME_DARK",
      "timeZone": "Europe/London",
      "browserName": "Firefox",
      "browserVersion": "146.0",
      "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
      "deviceExperimentId": "ChxOelU0T1RJek9UazRPVEEzTXpZek5UZzFNQT09EI_LycoGGI_LycoG",
      "rolloutToken": "CMfYs_S386W6_AEQkMqysdjikQMYkMqysdjikQM%3D",
      "screenWidthPoints": 1727,
      "screenHeightPoints": 638,
      "utcOffsetMinutes": 0,
      "mainAppWebInfo": {
        "graftUrl": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
        "webDisplayMode": "WEB_DISPLAY_MODE_BROWSER",
        "isWebNativeShareAvailable": False
      }
    },
    "user": {
      "lockedSafetyMode": False
    },
    "request": {
      "useSsl": True,
      "internalExperimentFlags": [],
      "consistencyTokenJars": []
    },
    "adSignalsInfo": {
      "params": [
        {
          "key": "dt",
          "value": "1767007632627"
        },
        {
          "key": "flash",
          "value": "0"
        },
        {
          "key": "frm",
          "value": "0"
        },
        {
          "key": "u_tz",
          "value": "0"
        },
        {
          "key": "u_his",
          "value": "1"
        },
        {
          "key": "u_h",
          "value": "1080"
        },
        {
          "key": "u_w",
          "value": "1728"
        },
        {
          "key": "u_ah",
          "value": "1080"
        },
        {
          "key": "u_aw",
          "value": "1728"
        },
        {
          "key": "u_cd",
          "value": "24"
        },
        {
          "key": "bc",
          "value": "31"
        },
        {
          "key": "bih",
          "value": "638"
        },
        {
          "key": "biw",
          "value": "1727"
        },
        {
          "key": "brdim",
          "value": "0,0,0,0,1728,0,1745,1047,1727,638"
        },
        {
          "key": "vis",
          "value": "1"
        },
        {
          "key": "wgl",
          "value": "true"
        },
        {
          "key": "ca_type",
          "value": "image"
        }
      ]
    }
  },
  "settingItemIds": [
    "484"
  ]
}

response = requests.post("https://www.youtube.com/youtubei/v1/account/get_setting_values?prettyPrint=false", json=data)
# did we get a 200 OK response?
print(f"Response status code: {response.status_code}")
# did we get visitorData back?
if response.status_code == 200 and ("visitorData" in response.text or "visitor_data" in response.text):
    print("visitorData found in response!")
else:
    print("visitorData NOT found in response.")

print(response.text)

Response status code: 200
visitorData found in response!
{"responseContext":{"visitorData":"Cgt3TjFTNzhYYklMWSi5w8rKBjIKCgJHQhIEGgAgPmLgAgrdAjEyLllURT1vV2k4VnBka1NtdUJ4ZWx4cl9lU2lka1FPaXBsamZfSy1oSzUzVjIxUVNiMnZmY2FFSE85SHdhQ3JySExQaEVsQUF4eWxmV2VPeVlPMnozeWswNlVyZE1ONWExYzBibHYyUGtzdTBnQ2tTMWkyUURkVXhOTXZjeU5QNUVaUjEyZklOOVQxNmxOcEI2TjJEOVpVQk5YWmdsLV8yTVFDR3dMVzF1akttLVkza2tOM2Nhc2hFczRfQTU2c05QUHBWTE1Uc3M4Ny1fSGljcEhfQlRjX2hINXhiTlZ1OWMwSU81ZUN5QUlhbjBMc2lwdVN1V0tUdmdDdmpjUWZiSEdfbkxUaHNLV09JQTEzdFlTaTFoUzVzVVFKTlBnNU1mWXBDcFk3YURMZ1Q0eDBMTnZQLUR6SEdJTlZFUzE1UEJoOHhJN1F0NVRwYmswLWgtajB1QzRpWmNVUXeCAd8Ck7PGju6l_YS3UWKaM4JgoGIlHSylFSW-E7y6br7ynx-elDWOBipLelkURK2Oqu1cTz98WKVu5999jrORMU4NIv7Nwuv_-tj_lPaKPIYjJn0DXlrESfqPUIBkdd-dWu0zED2u84-FEcReYo3kJmeKRCXHC79gmZCmQ9Ch4fJCucAUBz4jNFVRFYqtiT23YdZQnNHdqsB03W1JRVCEmdhBY2QmW_8oZkgrblezHRJjN7KJHbebzEvXERNid-6PPFfkKIjUnC_fAK0ukeLw67TDPmNt1niL021yf-fWeiZB7WOTFOnK98KUdBHZagD_DAk6W3Gy0zmZ4IDo-oZ5k-MHSvv-rq1AL0s5OO9HS6lGCNO-gkzXVRSX4hch6s764iN1cLSw

In [3]:
import copy
import time

url = "https://www.youtube.com/youtubei/v1/account/get_setting_values?prettyPrint=false"
base_payload = copy.deepcopy(data)  # 'data' exists in notebook

def send(payload):
    try:
        resp = requests.post(url, json=payload, timeout=10)
        return resp
    except Exception as e:
        print(f"Request error: {e}")
        return None

def is_successful(resp):
    if resp is None:
        return False
    return resp.status_code == 200 and ("visitorData" in resp.text or "visitor_data" in resp.text)

# 1) Test removing each top-level context key
top_keys = list(base_payload.get("context", {}).keys())
top_results = {}
print("Top-level context key removal tests:")
for k in top_keys:
    p = copy.deepcopy(base_payload)
    p["context"].pop(k, None)
    resp = send(p)
    ok = is_successful(resp)
    top_results[k] = {
        "success": ok,
        "status_code": getattr(resp, "status_code", None),
        "visitorData_present": (resp is not None and ("visitorData" in resp.text or "visitor_data" in resp.text)),
    }
    print(f" - removed '{k}': success={ok}, status={top_results[k]['status_code']}, visitorData_present={top_results[k]['visitorData_present']}")
    time.sleep(0.25)

# 2) Bisecting search over context.client keys to find necessary keys
client = base_payload["context"].get("client", {})
client_keys = list(client.keys())

necessary = set()
unnecessary = set()

def bisect_keys(keys_subset):
    if not keys_subset:
        return
    # attempt removing this subset
    p = copy.deepcopy(base_payload)
    test_client = p["context"].get("client", {})
    for k in keys_subset:
        test_client.pop(k, None)
    p["context"]["client"] = test_client
    resp = send(p)
    ok = is_successful(resp)
    # if success then none of these keys are necessary
    if ok:
        unnecessary.update(keys_subset)
        print(f"  Removed subset {keys_subset} -> OK (subset not necessary)")
        return
    # if failure and subset of size 1 => that key is necessary
    if len(keys_subset) == 1:
        necessary.add(keys_subset[0])
        print(f"  Removed key '{keys_subset[0]}' -> FAILED (necessary)")
        return
    # else split and recurse
    mid = len(keys_subset) // 2
    bisect_keys(keys_subset[:mid])
    bisect_keys(keys_subset[mid:])

print("\nBisecting 'context.client' keys:")
bisect_keys(client_keys)

print("\nSummary:")
print("Top-level removals:")
for k, v in top_results.items():
    print(f" - {k}: success={v['success']}, status={v['status_code']}, visitorData_present={v['visitorData_present']}")
print(f"\nClient keys considered: {client_keys}")
print(f"Necessary client keys (inferred): {sorted(necessary)}")
print(f"Unnecessary client keys (inferred): {sorted(unnecessary)}")

Top-level context key removal tests:
 - removed 'client': success=False, status=400, visitorData_present=False
 - removed 'user': success=True, status=200, visitorData_present=True
 - removed 'request': success=True, status=200, visitorData_present=True
 - removed 'adSignalsInfo': success=True, status=200, visitorData_present=True

Bisecting 'context.client' keys:
  Removed subset ['hl', 'gl', 'remoteHost', 'deviceMake', 'deviceModel', 'visitorData', 'userAgent'] -> OK (subset not necessary)
  Removed key 'clientName' -> FAILED (necessary)
  Removed key 'clientVersion' -> FAILED (necessary)
  Removed subset ['osName'] -> OK (subset not necessary)
  Removed subset ['osVersion', 'originalUrl', 'screenPixelDensity', 'platform'] -> OK (subset not necessary)
  Removed subset ['clientFormFactor', 'configInfo', 'screenDensityFloat', 'userInterfaceTheme', 'timeZone', 'browserName', 'browserVersion', 'acceptHeader', 'deviceExperimentId', 'rolloutToken', 'screenWidthPoints', 'screenHeightPoints'

In [4]:
# Make a final test payload with only necessary keys
final_payload = copy.deepcopy(base_payload)
final_client = {}
for k in necessary:
    final_client[k] = base_payload["context"]["client"][k]
final_payload["context"]["client"] = final_client
final_resp = send(final_payload)
final_ok = is_successful(final_resp)
print(f"\nFinal test with only necessary keys: success={final_ok}, status={getattr(final_resp, 'status_code', None)}, visitorData_present={(final_resp is not None and ('visitorData' in final_resp.text or 'visitor_data' in final_resp.text))}")
print(final_resp.text)


Final test with only necessary keys: success=True, status=200, visitorData_present=True
{"responseContext":{"visitorData":"CgtrZjZxcDFneGoyRSi8w8rKBjIKCgJHQhIEGgAgDw%3D%3D","serviceTrackingParams":[{"service":"CSI","params":[{"key":"c","value":"WEB"},{"key":"cver","value":"2.20251222.04.00"},{"key":"yt_li","value":"0"},{"key":"GetSettingValues_rid","value":"0x8abe6af3cb43b4fe"}]},{"service":"GFEEDBACK","params":[{"key":"logged_in","value":"0"},{"key":"visitor_data","value":"CgtrZjZxcDFneGoyRSi8w8rKBjIKCgJHQhIEGgAgDw%3D%3D"}]},{"service":"GUIDED_HELP","params":[{"key":"logged_in","value":"0"}]},{"service":"ECATCHER","params":[{"key":"client.version","value":"2.20251222"},{"key":"client.name","value":"WEB"}]}],"mainAppWebResponseContext":{"loggedOut":true},"webResponseContextExtensionData":{"hasDecorated":true}},"settingValues":[{"key":"484","value":{"stringValue":""}}]}


In [5]:
import requests

def extract_visitor_data(response):
    """Recursively extract visitorData from response JSON.
    
    This is lazy."""
    try:
        data = response.json()
    except Exception as e:
        print(f"JSON decode error: {e}")
        return None

    def recursive_search(obj):
        if isinstance(obj, dict):
            for k, v in obj.items():
                if k in ("visitorData", "visitor_data"):
                    return v
                result = recursive_search(v)
                if result is not None:
                    return result
        elif isinstance(obj, list):
            for item in obj:
                result = recursive_search(item)
                if result is not None:
                    return result
        return None

    return recursive_search(data)

# function to make request and fetch visitorData for a client context
def fetch_visitor_data(client_context):
    payload = {
        "context": {
            "client": client_context
        },
        "settingItemIds": [
            "484"
        ]
    }
    try:
        url = "https://www.youtube.com/youtubei/v1/account/get_setting_values?prettyPrint=false"
        resp = requests.post(url, json=payload, timeout=10)
        if resp.status_code == 200 and ("visitorData" in resp.text or "visitor_data" in resp.text):
            return extract_visitor_data(resp)
        else:
            return None
    except Exception as e:
        print(f"Request error: {e}")
        return None
# Example usage:
client_context = {
        "clientName": "WEB",
        "clientVersion": "2.20251222.04.00",
    }
visitor_data_response = fetch_visitor_data(client_context)
if visitor_data_response:
    print("Successfully fetched visitorData:")
    print(visitor_data_response)
else:
    print("Failed to fetch visitorData with given client context.")

Successfully fetched visitorData:
CgtQbUg3N1UyeEU1USi8w8rKBjIKCgJHQhIEGgAgSg%3D%3D


## Idea: Switch to a different `clientName` that may not require the attestation

The `yt-dlp` package contains a bunch of different setups for clientName, with different parameters being required for each.

In [6]:
# Test all predefined clients
clients = YouTubeTranscriptFetcher.CLIENTS
for client_name, client_info in clients.items():
    print(f"\nTesting client: {client_name}")
    context = {
        "client": {
            "clientName": client_info["clientName"],
            "clientVersion": client_info["clientVersion"],
        }
    }
    visitor_data = fetch_visitor_data(context)
    if visitor_data:
        print(f"  Successfully fetched visitorData for {client_name}: {visitor_data}")
        context["client"]["visitorData"] = visitor_data
        test_fetch_with_context(context)
    else:
        print(f"  Failed to fetch visitorData for {client_name}")



Testing client: web
  Failed to fetch visitorData for web

Testing client: android
  Failed to fetch visitorData for android

Testing client: ios
  Failed to fetch visitorData for ios

Testing client: tv
  Failed to fetch visitorData for tv

Testing client: mweb
  Failed to fetch visitorData for mweb


# Idea: We may be missing headers

`yt-dlp` sets more headers than we do, like `X-Youtube-Client-Name`. Try the above but set more headers?

In [7]:
# taken from yt-dlp (and removed PO policies)
INNERTUBE_CLIENTS = {
    'web': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'WEB',
                'clientVersion': '2.20250925.01.00',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
        'SUPPORTS_COOKIES': True,
        'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
    },
    # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
    'web_safari': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'WEB',
                'clientVersion': '2.20250925.01.00',
                'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
        'SUPPORTS_COOKIES': True,
        'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
    },
    'web_embedded': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'WEB_EMBEDDED_PLAYER',
                'clientVersion': '1.20250923.21.00',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
        'SUPPORTS_COOKIES': True,
    },
    'web_music': {
        'INNERTUBE_HOST': 'music.youtube.com',
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'WEB_REMIX',
                'clientVersion': '1.20250922.03.00',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
        'SUPPORTS_COOKIES': True,
        'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
    },
    # This client now requires sign-in for every video
    'web_creator': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'WEB_CREATOR',
                'clientVersion': '1.20250922.03.00',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
        'REQUIRE_AUTH': True,
        'SUPPORTS_COOKIES': True,
    },
    'android': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'ANDROID',
                'clientVersion': '20.10.38',
                'androidSdkVersion': 30,
                'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip',
                'osName': 'Android',
                'osVersion': '11',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
        'REQUIRE_JS_PLAYER': False,
    },
    # Doesn't require a PoToken for some reason
    'android_sdkless': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'ANDROID',
                'clientVersion': '20.10.38',
                'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip',
                'osName': 'Android',
                'osVersion': '11',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
        'REQUIRE_JS_PLAYER': False,
    },
    # YouTube Kids videos aren't returned on this client for some reason
    'android_vr': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'ANDROID_VR',
                'clientVersion': '1.65.10',
                'deviceMake': 'Oculus',
                'deviceModel': 'Quest 3',
                'androidSdkVersion': 32,
                'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.65.10 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
                'osName': 'Android',
                'osVersion': '12L',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
        'REQUIRE_JS_PLAYER': False,
    },
    # iOS clients have HLS live streams. Setting device model to get 60fps formats.
    # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
    'ios': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'IOS',
                'clientVersion': '20.10.4',
                'deviceMake': 'Apple',
                'deviceModel': 'iPhone16,2',
                'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
                'osName': 'iPhone',
                'osVersion': '18.3.2.22D82',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
        'REQUIRE_JS_PLAYER': False,
    },
    # mweb has 'ultralow' formats
    # See: https://github.com/yt-dlp/yt-dlp/pull/557
    'mweb': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'MWEB',
                'clientVersion': '2.20250925.01.00',
                # mweb previously did not require PO Token with this UA
                'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
        'SUPPORTS_COOKIES': True,
        'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
    },
    'tv': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'TVHTML5',
                'clientVersion': '7.20250923.13.00',
                'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
        'SUPPORTS_COOKIES': True,
        # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506
        'AUTHENTICATED_USER_AGENT': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)',
    },
    'tv_downgraded': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'TVHTML5',
                'clientVersion': '5.20251105',
                'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
        'SUPPORTS_COOKIES': True,
    },
    'tv_simply': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'TVHTML5_SIMPLY',
                'clientVersion': '1.0',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 75,
    },
    # This client now requires sign-in for every video
    # It was previously an age-gate workaround for videos that were `playable_in_embed`
    # It may still be useful if signed into an EU account that is not age-verified
    'tv_embedded': {
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
                'clientVersion': '2.0',
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
        'REQUIRE_AUTH': True,
        'SUPPORTS_COOKIES': True,
    },
}

In [8]:
from urllib.parse import unquote

# Try to set more headers in a custom session
def set_up_session_headers(sess, client_info):
    sess.headers.update({
        "X-Youtube-Client-Name": str(client_info["INNERTUBE_CONTEXT_CLIENT_NAME"]),
    })
    sess.headers.update({
        "X-Youtube-Client-Version": client_info["INNERTUBE_CONTEXT"]["client"]["clientVersion"],
    })
    sess.headers.update({
        "User-Agent": client_info["INNERTUBE_CONTEXT"]["client"].get("userAgent", "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0,gzip(gfe)"),
    })
    sess.headers.update({
        "Accept": "*/*",
        "Content-Type": "application/json",
        "Origin": "https://www.youtube.com",
        "Referer": "https://www.youtube.com/",
    })
    sess.headers.update({
        "X-Goog-EOM-Visitor-Id": unquote(client_info["INNERTUBE_CONTEXT"]["client"].get("visitorData", "")),
    })
    sess.headers.update({
        "X-Youtube-Bootstrap-Logged-In": "false",
    })

def set_up_session_cookies(sess: requests.Session, client_info):
    if client_info.get("SUPPORTS_COOKIES", False):
        # make a request to youtube to set cookies
        resp = sess.get("https://www.youtube.com")
        resp.raise_for_status()
        # https://www.youtube.com/upgrade_visitor_cookie?eom=1
        resp = sess.post("https://www.youtube.com/upgrade_visitor_cookie?eom=1")
        resp.raise_for_status()
        sess.cookies.set("SOCS", "CAI", domain=".youtube.com", path="/", secure=True)
        sess.cookies.set("PREF", "en", domain=".youtube.com", path="/")

In [9]:
import copy
import requests
import pprint

clients = copy.deepcopy(INNERTUBE_CLIENTS)
for client_name, client_info in clients.items():
    session = requests.Session()
    print(f"\nTesting client: {client_name}")
    context = client_info["INNERTUBE_CONTEXT"]
    visitor_data = fetch_visitor_data(context["client"])
    if visitor_data:
        print(f"  Successfully fetched visitorData for {client_name}: {visitor_data}")
        context["client"]["visitorData"] = unquote(visitor_data)
        set_up_session_headers(session, client_info)
        set_up_session_cookies(session, client_info)
        print("  Session headers:")
        pprint.pprint(dict(session.headers), indent=2)
        print("  Session cookies:")
        pprint.pprint(session.cookies.get_dict(domain=".youtube.com"))
        test_fetch_with_context(context, session=session)
    else:
        print(f"  Failed to fetch visitorData for {client_name}")



Testing client: web
  Successfully fetched visitorData for web: CgtZRkNfYXgtTzNnUSi8w8rKBjIKCgJHQhIEGgAgLg%3D%3D


  Session headers:
{ 'Accept': '*/*',
  'Accept-Encoding': 'gzip, deflate, zstd',
  'Connection': 'keep-alive',
  'Content-Type': 'application/json',
  'Origin': 'https://www.youtube.com',
  'Referer': 'https://www.youtube.com/',
  'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 '
                'Firefox/146.0,gzip(gfe)',
  'X-Goog-EOM-Visitor-Id': 'CgtZRkNfYXgtTzNnUSi8w8rKBjIKCgJHQhIEGgAgLg==',
  'X-Youtube-Bootstrap-Logged-In': 'false',
  'X-Youtube-Client-Name': '1',
  'X-Youtube-Client-Version': '2.20250925.01.00'}
  Session cookies:
{'PREF': 'en',
 'SOCS': 'CAI',
 'VISITOR_PRIVACY_METADATA': 'CgJHQhIEGgAgPQ%3D%3D',
 'YSC': 'uOALgq6CFHw',
 '__Secure-YEC': 'CgtPdVBYQkI0aVd0dyi8w8rKBjIKCgJHQhIEGgAgPQ%3D%3D',
 '__Secure-YENID': '12.YTE=DIIf2bhvp1BhPFtHLX76Mj16vV6rctu99citJNguoJHKdek7_oOPLtRKFev3wt8yf59En5O5UjGtflVm1Bl5ExfGyffDg4lICa9PQDaWbZDdS667dy5nWhdRGw9pQOzycYZTotDhbyHHoaZi1FcZ-idZPjNm829_sM_hIdDSrOIR1X2Yc1ciLftMAA1y90lTULBtWs0Vy-dMXwVRDOBJjMn4ZgyPluzyHJkr

In [10]:
# make a copy of tv_simply for additional testing
import copy
from yt_transcript_fetcher.api import YouTubeTranscriptFetcher

client_info = copy.deepcopy(INNERTUBE_CLIENTS["tv_simply"])
print("\nAdditional testing for client: tv_simply")
context = client_info["INNERTUBE_CONTEXT"]
visitor_data = fetch_visitor_data(context["client"])
if visitor_data:
    print(f"  Successfully fetched visitorData for tv_simply: {visitor_data}")
    context["client"]["visitorData"] = unquote(visitor_data)
    # set up session headers and cookies
    set_up_session_headers(session, client_info)
    set_up_session_cookies(session, client_info)
    fetcher = YouTubeTranscriptFetcher(session=session)
    fetcher._context = context
    fetcher.get_transcript("dQw4w9WgXcQ")
else:
    print(f"  Failed to fetch visitorData for tv_simply.")
    print("  No further testing possible without visitorData.")


Additional testing for client: tv_simply
  Successfully fetched visitorData for tv_simply: CgtHTEVrY1lkTkFVUSjDw8rKBjIKCgJHQhIEGgAgPA%3D%3D


NoLanguageError: No language available for the video.

# Alternative: Using the `/player` API to get caption tracks

The `/get_transcript` API requires attestation (PO Token) for WEB clients. An alternative is:
1. Call the `/player` API to get video info including `captionTracks`
2. Each caption track has a `baseUrl` pointing to timedtext XML
3. Fetch and parse the timedtext directly

This is what yt-dlp and the workarounds in YouTube.js issue #1102 use. Let's test this approach.

In [11]:
import requests
import copy
from urllib.parse import unquote

def get_player_response(video_id, client_info, session=None):
    """Get player response which contains captionTracks with baseUrl for timedtext."""
    sess = session or requests.Session()
    
    context = copy.deepcopy(client_info.get("INNERTUBE_CONTEXT", {"client": client_info}))
    
    # Try to get visitorData for this client
    visitor_data = fetch_visitor_data(context.get("client", {}))
    if visitor_data:
        context["client"]["visitorData"] = unquote(visitor_data)
    
    payload = {
        "context": context,
        "videoId": video_id,
        "playbackContext": {
            "contentPlaybackContext": {
                "html5Preference": "HTML5_PREF_WANTS",
            }
        },
        "contentCheckOk": True,
        "racyCheckOk": True
    }
    
    url = "https://www.youtube.com/youtubei/v1/player?prettyPrint=false"
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": context.get("client", {}).get("userAgent", "Mozilla/5.0"),
    }
    
    # Add client name header if available
    if "INNERTUBE_CONTEXT_CLIENT_NAME" in client_info:
        headers["X-Youtube-Client-Name"] = str(client_info["INNERTUBE_CONTEXT_CLIENT_NAME"])
    if "clientVersion" in context.get("client", {}):
        headers["X-Youtube-Client-Version"] = context["client"]["clientVersion"]
    
    try:
        response = sess.post(url, json=payload, headers=headers, timeout=15)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error getting player response: {e}")
        return None

def get_caption_tracks(player_response):
    """Extract caption tracks from player response."""
    try:
        captions = player_response.get("captions", {})
        pctr = captions.get("playerCaptionsTracklistRenderer", {})
        return pctr.get("captionTracks", [])
    except Exception as e:
        print(f"Error extracting caption tracks: {e}")
        return []

# Test with multiple clients
video_id = "dQw4w9WgXcQ"

test_clients = {
    "android": INNERTUBE_CLIENTS["android"],
    "android_sdkless": INNERTUBE_CLIENTS["android_sdkless"],
    "ios": INNERTUBE_CLIENTS["ios"],
    "tv_simply": INNERTUBE_CLIENTS["tv_simply"],
    "web": INNERTUBE_CLIENTS["web"],
}

for client_name, client_info in test_clients.items():
    print(f"\n{'='*60}")
    print(f"Testing client: {client_name}")
    print(f"{'='*60}")
    
    pr = get_player_response(video_id, client_info)
    
    if pr:
        # Check playability status
        playability = pr.get("playabilityStatus", {})
        status = playability.get("status")
        reason = playability.get("reason", "")
        print(f"  Playability: {status}")
        if reason:
            print(f"  Reason: {reason[:100]}...")
        
        # Check for caption tracks
        caption_tracks = get_caption_tracks(pr)
        print(f"  Caption tracks found: {len(caption_tracks)}")
        
        if caption_tracks:
            for track in caption_tracks[:3]:  # Show first 3
                print(f"    - {track.get('name', {}).get('simpleText', 'N/A')} ({track.get('languageCode', 'N/A')})")
                base_url = track.get("baseUrl", "")
                if base_url:
                    print(f"      baseUrl: {base_url[:80]}...")
    else:
        print("  Failed to get player response")


Testing client: android
  Playability: OK
  Caption tracks found: 6
    - N/A (en)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSafqEBvSFxs0Puq298QQ&c...
    - N/A (en)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSafqEBvSFxs0Puq298QQ&c...
    - N/A (de-DE)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSafqEBvSFxs0Puq298QQ&c...

Testing client: android_sdkless
  Playability: OK
  Caption tracks found: 6
    - N/A (en)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSaYOCIuHo6dsPu4KuSA&ca...
    - N/A (en)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSaYOCIuHo6dsPu4KuSA&ca...
    - N/A (de-DE)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSaYOCIuHo6dsPu4KuSA&ca...

Testing client: ios
  Playability: OK
  Caption tracks found: 6
    - N/A (en)
      baseUrl: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=0aFSafL1Otifp-

In [12]:
import re
from html import unescape

def parse_timedtext_xml(xml_content):
    """Parse timedtext XML and extract transcript segments."""
    segments = []
    
    # Try <p> tag format (Android style): <p t="ms" d="ms">text</p>
    p_pattern = r'<p\s+t="(\d+)"\s+d="(\d+)"[^>]*>([\s\S]*?)</p>'
    p_matches = re.findall(p_pattern, xml_content)
    
    if p_matches:
        for start_ms, duration_ms, text in p_matches:
            clean_text = re.sub(r'<[^>]+>', '', text).strip()
            clean_text = unescape(clean_text)
            if clean_text:
                segments.append({
                    "start": int(start_ms) / 1000,
                    "duration": int(duration_ms) / 1000,
                    "text": clean_text
                })
        return segments
    
    # Try <text> tag format: <text start="sec" dur="sec">text</text>
    text_pattern = r'<text\s+start="([\d.]+)"\s+dur="([\d.]+)"[^>]*>([\s\S]*?)</text>'
    text_matches = re.findall(text_pattern, xml_content)
    
    if text_matches:
        for start, dur, text in text_matches:
            clean_text = re.sub(r'<[^>]+>', '', text).strip()
            clean_text = unescape(clean_text)
            if clean_text:
                segments.append({
                    "start": float(start),
                    "duration": float(dur),
                    "text": clean_text
                })
        return segments
    
    return segments

def fetch_timedtext(base_url, session=None):
    """Fetch timedtext XML from the given URL."""
    sess = session or requests.Session()
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept-Language": "en-US,en;q=0.9",
    }
    try:
        response = sess.get(base_url, headers=headers, timeout=10)
        response.raise_for_status()
        return response.text
    except Exception as e:
        print(f"Error fetching timedtext: {e}")
        return None

# Test fetching and parsing timedtext from android_sdkless client
print("Testing timedtext fetch with android_sdkless client...")
print("=" * 60)

client_info = INNERTUBE_CLIENTS["android_sdkless"]
pr = get_player_response("dQw4w9WgXcQ", client_info)

if pr:
    caption_tracks = get_caption_tracks(pr)
    
    # Find English caption track (prefer non-auto-generated)
    english_track = None
    for track in caption_tracks:
        lang = track.get("languageCode", "")
        kind = track.get("kind", "")
        if lang == "en" and kind != "asr":  # Prefer manual captions
            english_track = track
            break
        elif lang == "en" and english_track is None:  # Fallback to ASR
            english_track = track
    
    if english_track:
        base_url = english_track.get("baseUrl")
        print(f"Found English track (kind: {english_track.get('kind', 'manual')})")
        print(f"Base URL: {base_url[:100]}...")
        print()
        
        # Fetch the timedtext
        xml_content = fetch_timedtext(base_url)
        
        if xml_content:
            print(f"Fetched {len(xml_content)} bytes of timedtext XML")
            print(f"First 500 chars: {xml_content[:500]}")
            print()
            
            # Parse the timedtext
            segments = parse_timedtext_xml(xml_content)
            print(f"Parsed {len(segments)} transcript segments")
            
            if segments:
                print("\nFirst 5 segments:")
                for seg in segments[:5]:
                    print(f"  [{seg['start']:.2f}s] {seg['text'][:60]}...")
        else:
            print("Failed to fetch timedtext")
    else:
        print("No English caption track found")
else:
    print("Failed to get player response")

Testing timedtext fetch with android_sdkless client...
Found English track (kind: manual)
Base URL: https://www.youtube.com/api/timedtext?v=dQw4w9WgXcQ&ei=1qFSaaXCEP6KvdIP5qWxoA0&caps=asr&opi=11249672...

Fetched 3877 bytes of timedtext XML
First 500 chars: <?xml version="1.0" encoding="utf-8" ?><timedtext format="3">
<body>
<p t="1360" d="1680">[♪♪♪]</p>
<p t="18640" d="3240">♪ We&#39;re no strangers to love ♪</p>
<p t="22640" d="4320">♪ You know the rules
and so do I ♪</p>
<p t="27040" d="4000">♪ A full commitment&#39;s
what I&#39;m thinking of ♪</p>
<p t="31120" d="3960">♪ You wouldn&#39;t get this
from any other guy ♪</p>
<p t="35160" d="4360">♪ I just wanna tell you
how I&#39;m feeling ♪</p>
<p t="40520" d="2400">♪ Gotta make you understand ♪<

Parsed 61 transcript segments

First 5 segments:
  [1.36s] [♪♪♪]...
  [18.64s] ♪ We're no strangers to love ♪...
  [22.64s] ♪ You know the rules
and so do I ♪...
  [27.04s] ♪ A full commitment's
what I'm thinking of ♪...
  [31.12s] ♪ You wo

## Testing android_sdkless with /get_transcript API

According to yt-dlp source code, `android_sdkless` client "Doesn't require a PoToken for some reason".

Let's test if this client can use the `/get_transcript` endpoint (engagement panel approach) successfully.

In [13]:
# Force reload the module
import importlib
import yt_transcript_fetcher.protobuf
importlib.reload(yt_transcript_fetcher.protobuf)

import requests
import base64
from yt_transcript_fetcher.protobuf import generate_params, encode_visitor_data

def test_get_transcript_with_client(video_id, client_name, client_info, lang="en"):
    """Test if /get_transcript API works with a given client."""
    
    # Build innertube context
    innertube_context = {"client": client_info.get("INNERTUBE_CONTEXT", {}).get("client", {}).copy()}
    
    # Add visitorData
    visitor_data = encode_visitor_data()
    innertube_context["client"]["visitorData"] = visitor_data
    
    # Request body
    body = {
        "context": innertube_context,
        "params": generate_params(video_id, lang)
    }
    
    # API endpoint
    api_url = "https://www.youtube.com/youtubei/v1/get_transcript"
    api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
    
    # Headers
    user_agent = innertube_context.get("client", {}).get("userAgent", 
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": user_agent,
        "X-Goog-Api-Key": api_key,
        "X-Goog-Visitor-Id": visitor_data,
    }
    
    try:
        resp = requests.post(
            api_url,
            params={"key": api_key},
            json=body,
            headers=headers,
            timeout=10
        )
        
        print(f"\n{client_name}:")
        print(f"  Status: {resp.status_code}")
        
        if resp.status_code == 200:
            data = resp.json()
            # Check for transcript actions
            actions = data.get("actions", [])
            if actions:
                update_action = actions[0].get("updateEngagementPanelAction", {})
                content = update_action.get("content", {})
                transcript_renderer = content.get("transcriptRenderer", {})
                body_content = transcript_renderer.get("content", {}).get("transcriptSearchPanelRenderer", {})
                segments = body_content.get("body", {}).get("transcriptSegmentListRenderer", {}).get("initialSegments", [])
                print(f"  Transcript segments: {len(segments)}")
                return len(segments) > 0
            else:
                print(f"  Response keys: {list(data.keys())}")
                return False
        else:
            error = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else resp.text
            if isinstance(error, dict):
                err_msg = error.get("error", {}).get("message", str(error))[:100]
                err_status = error.get("error", {}).get("status", "")
                print(f"  Error: {err_status} - {err_msg}")
            else:
                print(f"  Error: {str(error)[:100]}...")
            return False
            
    except Exception as e:
        print(f"  Exception: {e}")
        return False

# Test clients that might work without PO Token
VIDEO_ID = "dQw4w9WgXcQ"

test_clients = {
    "android_sdkless": {
        "INNERTUBE_CONTEXT": {
            "client": {
                "clientName": "ANDROID",
                "clientVersion": "20.10.38",
                "userAgent": "com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip",
                "osName": "Android",
                "osVersion": "11",
            },
        },
    },
    "android": {
        "INNERTUBE_CONTEXT": {
            "client": {
                "clientName": "ANDROID",
                "clientVersion": "19.09.37",
                "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
                "osName": "Android",
                "osVersion": "11",
            },
        },
    },
    "ios": {
        "INNERTUBE_CONTEXT": {
            "client": {
                "clientName": "IOS",
                "clientVersion": "19.09.3",
                "deviceMake": "Apple",
                "deviceModel": "iPhone",
                "userAgent": "com.google.ios.youtube/19.09.3 (iPhone; U; CPU iPhone OS 17_4 like Mac OS X)",
                "osName": "iPhone",
                "osVersion": "17.4",
            },
        },
    },
    "web": {
        "INNERTUBE_CONTEXT": {
            "client": {
                "clientName": "WEB",
                "clientVersion": "2.20240101",
            },
        },
    },
}

print("Testing /get_transcript API with different clients...")
print("=" * 60)

for name, info in test_clients.items():
    result = test_get_transcript_with_client(VIDEO_ID, name, info)
    if result:
        print(f"  ✓ {name} WORKS!")
    else:
        print(f"  ✗ {name} failed")

Testing /get_transcript API with different clients...

android_sdkless:
  Status: 400
  Error: FAILED_PRECONDITION - Precondition check failed.
  ✗ android_sdkless failed

android:
  Status: 200
  Transcript segments: 0
  ✗ android failed

ios:
  Status: 200
  Transcript segments: 0
  ✗ ios failed

web:
  Status: 400
  Error: FAILED_PRECONDITION - Precondition check failed.
  ✗ web failed


In [14]:
# Let's look at what the android client actually returns
import json

def detailed_get_transcript(video_id, client_name, client_info, lang="en"):
    """Get detailed response from /get_transcript API."""
    
    innertube_context = {"client": client_info.get("INNERTUBE_CONTEXT", {}).get("client", {}).copy()}
    visitor_data = encode_visitor_data()
    innertube_context["client"]["visitorData"] = visitor_data
    
    body = {
        "context": innertube_context,
        "params": generate_params(video_id, lang)
    }
    
    api_url = "https://www.youtube.com/youtubei/v1/get_transcript"
    api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
    
    user_agent = innertube_context.get("client", {}).get("userAgent", 
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": user_agent,
        "X-Goog-Api-Key": api_key,
        "X-Goog-Visitor-Id": visitor_data,
    }
    
    resp = requests.post(api_url, params={"key": api_key}, json=body, headers=headers, timeout=10)
    return resp.status_code, resp.json()

# Test with android client
status, data = detailed_get_transcript("dQw4w9WgXcQ", "android", test_clients["android"])
print(f"Android client response (status {status}):")
print(json.dumps(data, indent=2)[:2000])

Android client response (status 200):
{
  "responseContext": {
    "visitorData": "CgtTaFF5WWNNa0FNZyjhw8rKBjIKCgJHQhIEGgAgYjoMCAEg7ar4_ZO8qKlpWI-a4O22jOHGSA%3D%3D",
    "serviceTrackingParams": [
      {
        "service": "CSI",
        "params": [
          {
            "key": "c",
            "value": "ANDROID"
          },
          {
            "key": "cver",
            "value": "19.09.37"
          },
          {
            "key": "yt_li",
            "value": "0"
          },
          {
            "key": "GetVideoTranscript_rid",
            "value": "0xa7ffa23c346d3214"
          }
        ]
      },
      {
        "service": "GFEEDBACK",
        "params": [
          {
            "key": "logged_in",
            "value": "0"
          },
          {
            "key": "visitor_data",
            "value": "CgtTaFF5WWNNa0FNZyjhw8rKBjIKCgJHQhIEGgAgYjoMCAEg7ar4_ZO8qKlp"
          }
        ]
      },
      {
        "service": "GUIDED_HELP",
        "params": [
          {

In [15]:
# Check what keys are in the android response
print("Top-level keys in android response:", list(data.keys()))
print()

# Check for actions
if "actions" in data:
    print("Actions found:", len(data["actions"]))
    if data["actions"]:
        print("First action keys:", list(data["actions"][0].keys()))
else:
    print("No 'actions' key - transcript not returned")
    
# Let's also try using your library's actual API class to see if it works
print("\n" + "=" * 60)
print("Testing with the library's actual API class:")
from yt_transcript_fetcher.api import YouTubeTranscriptFetcher

fetcher = YouTubeTranscriptFetcher()
try:
    languages = fetcher.list_languages("dQw4w9WgXcQ")
    print(f"Languages found: {len(languages)}")
    for lang in languages[:5]:
        print(f"  {lang}")
except Exception as e:
    print(f"Error: {type(e).__name__}: {e}")

Top-level keys in android response: ['responseContext', 'actions', 'trackingParams', 'frameworkUpdates']

Actions found: 1
First action keys: ['clickTrackingParams', 'elementsCommand']

Testing with the library's actual API class:
Error: NoLanguageError: No language available for the video.


## Summary of Findings

### The `/get_transcript` API (Engagement Panel Approach)
- **WEB, android_sdkless (newer Android)**: Return `FAILED_PRECONDITION` (400) - now requires PO Token (attestation)
- **Android (older), iOS**: Return 200 but with an `elementsCommand` action (not `updateEngagementPanelAction`) - different response format that doesn't include transcripts via engagement panels
- **TVHTML5_SIMPLY**: Returns 200 but empty (no engagement panel support)

### The `/player` API + Timedtext Approach
- **ALL tested clients work**: Successfully returns caption tracks with baseUrl
- **Timedtext fetch works**: Can fetch and parse XML transcript segments
- **Downside**: Rate limiting concerns (requires proxy rotation for production)

### Options:
1. **Switch to `/player` API + Timedtext** - Works but has rate limiting
2. **Implement PO Token generation** - Requires JavaScript runtime (bgutils-js)
3. **Use a PO Token provider service** - External dependency
4. **Accept current limitation** - Only works with proper attestation

In [16]:
# Let's explore the 'elementsCommand' from the Android client to see if it contains transcript data
print("Exploring Android client response structure...")
print("=" * 60)

status, data = detailed_get_transcript("dQw4w9WgXcQ", "android", test_clients["android"])

if "actions" in data and data["actions"]:
    action = data["actions"][0]
    print(f"Action keys: {list(action.keys())}")
    
    if "elementsCommand" in action:
        elements_cmd = action["elementsCommand"]
        print(f"\nelementsCommand keys: {list(elements_cmd.keys())}")
        
        # See the structure
        import json
        print("\nelementsCommand content (truncated):")
        print(json.dumps(elements_cmd, indent=2)[:3000])

Exploring Android client response structure...
Action keys: ['clickTrackingParams', 'elementsCommand']

elementsCommand keys: ['transformEntityCommand']

elementsCommand content (truncated):
{
  "transformEntityCommand": {
    "identifier": "dQw4w9WgXcQ.transcript.full.state.key",
    "transform": {
      "types": [
        {
          "typeId": 2,
          "fieldType": "EKO_FIELD_TYPE_MESSAGE"
        },
        {
          "typeId": 12,
          "fieldType": "EKO_FIELD_TYPE_INT32"
        }
      ],
      "variables": [
        {
          "variableId": 1,
          "variableType": "EKO_VARIABLE_TYPE_INPUT"
        },
        {
          "variableId": 2,
          "typeId": 2,
          "variableType": "EKO_VARIABLE_TYPE_OUTPUT",
          "value": {
            "messageValue": {
              "fields": [
                {
                  "tag": 1,
                  "value": {
                    "chooseValue": {
                      "whenThenValues": [
                        {

In [17]:
# Explore the elementsCommand path for transcripts!
print("Exploring Android client elementsCommand for transcripts...")
print("=" * 60)

status, data = detailed_get_transcript("dQw4w9WgXcQ", "android", test_clients["android"])

if "actions" in data and data["actions"]:
    action = data["actions"][0]
    
    try:
        # Navigate to the transcript segments
        elements_cmd = action.get("elementsCommand", {})
        transform_cmd = elements_cmd.get("transformEntityCommand", {})
        arguments = transform_cmd.get("arguments", {})
        transform_args = arguments.get("transformTranscriptSegmentListArguments", {})
        overwrite = transform_args.get("overwrite", {})
        initial_segments = overwrite.get("initialSegments", [])
        
        print(f"Found {len(initial_segments)} transcript segments!")
        
        if initial_segments:
            print("\nFirst 5 segments:")
            for i, seg in enumerate(initial_segments[:5]):
                print(f"\nSegment {i+1}:")
                print(json.dumps(seg, indent=2)[:500])
    except Exception as e:
        print(f"Error navigating structure: {e}")
        
        # Let's try to find the path
        print("\nTrying to find the path...")
        elements_cmd = action.get("elementsCommand", {})
        transform_cmd = elements_cmd.get("transformEntityCommand", {})
        print(f"transformEntityCommand keys: {list(transform_cmd.keys())}")

Exploring Android client elementsCommand for transcripts...
Found 52 transcript segments!

First 5 segments:

Segment 1:
{
  "transcriptSegmentRenderer": {
    "startMs": "320",
    "endMs": "14580",
    "snippet": {
      "elementsAttributedString": {
        "content": "[Music]"
      }
    },
    "startTimeText": {
      "elementsAttributedString": {
        "content": "0:00"
      }
    },
    "trackingParams": "CDQQ0_YHGDYiEwihmc77keORAxX3dvYIHYMiHT4=",
    "accessibility": {
      "accessibilityData": {
        "label": "0 seconds [Music]"
      }
    },
    "entityKey": "EhVkUXc0dzlXZ1hjUV8zMjBfMTQ1ODAgngIo

Segment 2:
{
  "transcriptSegmentRenderer": {
    "startMs": "18800",
    "endMs": "21800",
    "snippet": {
      "elementsAttributedString": {
        "content": "We're no strangers to"
      }
    },
    "startTimeText": {
      "elementsAttributedString": {
        "content": "0:18"
      }
    },
    "trackingParams": "CDMQ0_YHGDciEwihmc77keORAxX3dvYIHYMiHT4=",
    "acc

In [18]:
# Parse the Android client elementsCommand response format
def parse_elements_command_transcript(response_data):
    """Parse transcript from Android client's elementsCommand response."""
    segments = []
    
    try:
        actions = response_data.get("actions", [])
        if not actions:
            return segments
        
        action = actions[0]
        elements_cmd = action.get("elementsCommand", {})
        transform_cmd = elements_cmd.get("transformEntityCommand", {})
        arguments = transform_cmd.get("arguments", {})
        transform_args = arguments.get("transformTranscriptSegmentListArguments", {})
        overwrite = transform_args.get("overwrite", {})
        initial_segments = overwrite.get("initialSegments", [])
        
        for seg in initial_segments:
            renderer = seg.get("transcriptSegmentRenderer", {})
            
            start_ms = int(renderer.get("startMs", 0))
            end_ms = int(renderer.get("endMs", 0))
            text = renderer.get("snippet", {}).get("elementsAttributedString", {}).get("content", "")
            
            if text:
                segments.append({
                    "start": start_ms / 1000,
                    "duration": (end_ms - start_ms) / 1000,
                    "text": text
                })
        
        return segments
    except Exception as e:
        print(f"Error parsing elementsCommand transcript: {e}")
        return segments

# Test the parser
status, data = detailed_get_transcript("dQw4w9WgXcQ", "android", test_clients["android"])
segments = parse_elements_command_transcript(data)

print(f"Parsed {len(segments)} transcript segments from Android client!")
print("\nFirst 10 segments:")
for seg in segments[:10]:
    print(f"  [{seg['start']:.2f}s - {seg['start'] + seg['duration']:.2f}s] {seg['text']}")

print("\n" + "=" * 60)
print("SUCCESS! The older Android client (19.09.37) works without attestation!")
print("=" * 60)

Parsed 52 transcript segments from Android client!

First 10 segments:
  [0.32s - 14.58s] [Music]
  [18.80s - 21.80s] We're no strangers to
  [21.80s - 25.96s] love. You know the rules and so do
  [25.96s - 29.12s] I. I feel commitments from what I'm
  [29.12s - 30.28s] thinking
  [30.28s - 34.36s] of. You wouldn't get this from any other
  [34.36s - 39.56s] guy. I just want to tell you how I'm
  [39.56s - 43.12s] feeling. Got to make you understand.
  [43.12s - 45.84s] Never going to give you up. I'm going to
  [45.84s - 49.20s] let you down. I'm going to run around

SUCCESS! The older Android client (19.09.37) works without attestation!


In [19]:
# Test iOS client too
print("Testing iOS client with older version...")
print("=" * 60)

ios_old = {
    "INNERTUBE_CONTEXT": {
        "client": {
            "clientName": "IOS",
            "clientVersion": "19.09.3",  # Older version
            "deviceMake": "Apple",
            "deviceModel": "iPhone",
            "userAgent": "com.google.ios.youtube/19.09.3 (iPhone; U; CPU iPhone OS 17_4 like Mac OS X)",
            "osName": "iPhone",
            "osVersion": "17.4",
        },
    },
}

status, data = detailed_get_transcript("dQw4w9WgXcQ", "ios", ios_old)
print(f"iOS client response status: {status}")

if status == 200:
    segments = parse_elements_command_transcript(data)
    print(f"Parsed {len(segments)} transcript segments from iOS client!")
    if segments:
        print("\nFirst 5 segments:")
        for seg in segments[:5]:
            print(f"  [{seg['start']:.2f}s] {seg['text']}")
else:
    print(f"iOS client failed with status {status}")

Testing iOS client with older version...
iOS client response status: 200
Parsed 52 transcript segments from iOS client!

First 5 segments:
  [0.32s] [Music]
  [18.80s] We're no strangers to
  [21.80s] love. You know the rules and so do
  [25.96s] I. I feel commitments from what I'm
  [29.12s] thinking


## 🎉 SOLUTION FOUND!

### Working Clients (No Attestation Required)
The **older mobile client versions** (Android 19.09.37, iOS 19.09.3) return transcripts via the `/get_transcript` API without requiring attestation (PO Token).

### Key Differences
| Client Version | Response Format | Works? |
|---------------|----------------|--------|
| Android 20.10.38 (android_sdkless) | `FAILED_PRECONDITION` | ❌ |
| Android 19.09.37 | `elementsCommand.transformEntityCommand.arguments.transformTranscriptSegmentListArguments` | ✅ |
| iOS 19.09.3 | Same as Android 19.x | ✅ |
| WEB | `FAILED_PRECONDITION` | ❌ |

### Fix Required
1. **Downgrade** the Android client version from `20.10.38` to `19.09.37`
2. **Parse** the new response format: `elementsCommand` → `transformEntityCommand` → `arguments` → `transformTranscriptSegmentListArguments` → `overwrite` → `initialSegments`
3. Each segment has `transcriptSegmentRenderer` with:
   - `startMs` / `endMs` - timestamps in milliseconds
   - `snippet.elementsAttributedString.content` - the text

In [22]:
# Let's explore the Android response for language menu data
print("Exploring Android response for language menu...")
print("=" * 60)

status, data = detailed_get_transcript("dQw4w9WgXcQ", "android", test_clients["android"])

# Look for language-related keys in the response
def find_keys_recursive(obj, target_keys, path="", results=None):
    if results is None:
        results = []
    if isinstance(obj, dict):
        for key, value in obj.items():
            current_path = f"{path}.{key}" if path else key
            if any(k.lower() in key.lower() for k in target_keys):
                results.append((current_path, type(value).__name__))
            find_keys_recursive(value, target_keys, current_path, results)
    elif isinstance(obj, list):
        for i, item in enumerate(obj):
            find_keys_recursive(item, target_keys, f"{path}[{i}]", results)
    return results

# Search for language-related keys
language_keys = find_keys_recursive(data, ["language", "lang", "menu", "caption"])
print("Found language-related keys:")
for path, type_name in language_keys[:20]:
    print(f"  {path} ({type_name})")

# Check if there's a frameworkUpdates section
if "frameworkUpdates" in data:
    print("\n\nframeworkUpdates keys:")
    fu = data["frameworkUpdates"]
    pprint.pprint(fu, indent=2)

Exploring Android response for language menu...
Found language-related keys:


frameworkUpdates keys:
{ 'entityBatchUpdate': { 'mutations': [ { 'entityKey': 'EgtkUXc0dzlXZ1hjUSCgAigB',
                                          'payload': { 'transcriptSegmentsDataEntity': { 'key': 'EgtkUXc0dzlXZ1hjUSCgAigB',
                                                                                         'segmentsData': [ { 'endMs': '14580',
                                                                                                             'scrollCommand': { 'clickTrackingParams': 'CAAQw7wCIhMIoPr0lZLjkQMVbFX2CB3RcwqtygEEKIXQVg==',
                                                                                                                                'elementsCommand': { 'collectionTypeScrollToItemCommand': { 'animationConfig': { 'enableAnimation': False},
                                                                                                                             

In [23]:
# Test with "xx" language (invalid) to see if we get a language list
# This is how the library's list_languages works
print("Testing Android with 'xx' language to see language list...")
print("=" * 60)

from yt_transcript_fetcher.protobuf import generate_params, encode_visitor_data

android_old = {
    "INNERTUBE_CONTEXT": {
        "client": {
            "clientName": "ANDROID",
            "clientVersion": "19.09.37",
            "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
            "osName": "Android",
            "osVersion": "11",
        },
    },
}

innertube_context = {"client": android_old["INNERTUBE_CONTEXT"]["client"].copy()}
visitor_data = encode_visitor_data()
innertube_context["client"]["visitorData"] = visitor_data

body = {
    "context": innertube_context,
    "params": generate_params("dQw4w9WgXcQ", "xx"),  # "xx" is invalid
    "externalVideoId": "dQw4w9WgXcQ",
}

api_url = "https://www.youtube.com/youtubei/v1/get_transcript"
api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"

headers = {
    "Content-Type": "application/json",
    "User-Agent": android_old["INNERTUBE_CONTEXT"]["client"]["userAgent"],
    "X-Goog-Api-Key": api_key,
    "X-Goog-Visitor-Id": visitor_data,
}

resp = requests.post(api_url, params={"key": api_key}, json=body, headers=headers, timeout=10)
print(f"Status: {resp.status_code}")

if resp.status_code == 200:
    data = resp.json()
    print(f"Top-level keys: {list(data.keys())}")
    
    # Look for any language-related info in actions
    if "actions" in data and data["actions"]:
        print(f"Actions count: {len(data['actions'])}")
        action = data["actions"][0]
        print(f"Action keys: {list(action.keys())}")
        
        # Search for language menu in the elementsCommand structure
        if "elementsCommand" in action:
            ec = action["elementsCommand"]
            print(f"elementsCommand keys: {list(ec.keys())}")
else:
    print(f"Error: {resp.text[:500]}")

Testing Android with 'xx' language to see language list...
Status: 200
Top-level keys: ['responseContext', 'actions', 'trackingParams', 'frameworkUpdates']
Actions count: 1
Action keys: ['clickTrackingParams', 'elementsCommand']
elementsCommand keys: ['transformEntityCommand']


In [24]:
# Let's explore the full Android response structure to find language data
print("Looking for language selection data in Android response...")
print("=" * 60)

# Look at the full action structure
action = data["actions"][0]
transform_cmd = action.get("elementsCommand", {}).get("transformEntityCommand", {})
print(f"transformEntityCommand keys: {list(transform_cmd.keys())}")

# Check if there are additional actions
if len(data["actions"]) > 1:
    print(f"\nThere are {len(data['actions'])} actions")
    for i, act in enumerate(data["actions"]):
        print(f"  Action {i}: {list(act.keys())}")

# The language info might be in frameworkUpdates or responseContext
print("\nresponseContext keys:", list(data.get("responseContext", {}).keys()))

# Check the arguments structure
arguments = transform_cmd.get("arguments", {})
print(f"\ntransformEntityCommand.arguments keys: {list(arguments.keys())}")

# Let's check if there's a language selection in transformTranscriptSegmentListArguments
transform_args = arguments.get("transformTranscriptSegmentListArguments", {})
print(f"transformTranscriptSegmentListArguments keys: {list(transform_args.keys())}")

# Check the overwrite structure for language menu
overwrite = transform_args.get("overwrite", {})
print(f"overwrite keys: {list(overwrite.keys())}")

Looking for language selection data in Android response...
transformEntityCommand keys: ['identifier', 'transform', 'arguments']

responseContext keys: ['visitorData', 'serviceTrackingParams']

transformEntityCommand.arguments keys: ['transformTranscriptSegmentListArguments']
transformTranscriptSegmentListArguments keys: ['overwrite']
overwrite keys: []


## Hybrid Approach: `/player` API for Languages + `/get_transcript` for Transcripts

Since the Android `/get_transcript` response doesn't include a language menu, we need to use the `/player` API to get available caption tracks (languages), then use `/get_transcript` with the Android client to fetch the actual transcript segments.

In [26]:
# Step 1: Get caption tracks from /player API
import requests
import copy
from urllib.parse import unquote

def get_caption_tracks_from_player(video_id, client_info=None):
    """
    Get available caption tracks (languages) from the /player API.
    
    Returns list of caption track dicts with:
    - languageCode: e.g., "en", "es", "fr"
    - name: Display name like "English" or "English (auto-generated)"
    - kind: "asr" for auto-generated, None/empty for manual
    - baseUrl: URL to fetch timedtext (we won't use this)
    """
    if client_info is None:
        # Use Android client for consistency
        client_info = {
            "INNERTUBE_CONTEXT": {
                "client": {
                    "clientName": "ANDROID",
                    "clientVersion": "19.09.37",
                    "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
                    "osName": "Android",
                    "osVersion": "11",
                },
            },
            "INNERTUBE_CONTEXT_CLIENT_NAME": 3,
        }
    
    context = copy.deepcopy(client_info.get("INNERTUBE_CONTEXT", {"client": client_info}))
    
    # Get visitor data
    visitor_data = fetch_visitor_data(context.get("client", {}))
    if visitor_data:
        context["client"]["visitorData"] = unquote(visitor_data)
    
    payload = {
        "context": context,
        "videoId": video_id,
        "playbackContext": {
            "contentPlaybackContext": {
                "html5Preference": "HTML5_PREF_WANTS",
            }
        },
        "contentCheckOk": True,
        "racyCheckOk": True
    }
    
    url = "https://www.youtube.com/youtubei/v1/player?prettyPrint=false"
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": context.get("client", {}).get("userAgent", "Mozilla/5.0"),
    }
    
    if "INNERTUBE_CONTEXT_CLIENT_NAME" in client_info:
        headers["X-Youtube-Client-Name"] = str(client_info["INNERTUBE_CONTEXT_CLIENT_NAME"])
    if "clientVersion" in context.get("client", {}):
        headers["X-Youtube-Client-Version"] = context["client"]["clientVersion"]
    
    response = requests.post(url, json=payload, headers=headers, timeout=15)
    response.raise_for_status()
    
    player_response = response.json()
    
    # Extract caption tracks
    captions = player_response.get("captions", {})
    pctr = captions.get("playerCaptionsTracklistRenderer", {})
    caption_tracks = pctr.get("captionTracks", [])
    
    return caption_tracks

# Test it
video_id = "dQw4w9WgXcQ"
tracks = get_caption_tracks_from_player(video_id)

print(f"Found {len(tracks)} caption tracks for video {video_id}:")
print("=" * 60)
for track in tracks:
    lang_code = track.get("languageCode", "")
    name = track.get("name", {}).get("simpleText", "Unknown")
    kind = track.get("kind", "")
    is_auto = kind == "asr"
    print(f"  [{lang_code}] {name} {'(auto-generated)' if is_auto else ''}")

Found 6 caption tracks for video dQw4w9WgXcQ:
  [en] Unknown 
  [en] Unknown (auto-generated)
  [de-DE] Unknown 
  [ja] Unknown 
  [pt-BR] Unknown 
  [es-419] Unknown 


In [27]:
# Let's look at the raw track data to understand the name structure
print("Raw caption track structure:")
for i, track in enumerate(tracks):
    print(f"\nTrack {i+1}:")
    print(f"  languageCode: {track.get('languageCode')}")
    print(f"  name: {track.get('name')}")
    print(f"  kind: {track.get('kind', '(none)')}")
    print(f"  vssId: {track.get('vssId', '(none)')}")

Raw caption track structure:

Track 1:
  languageCode: en
  name: {'runs': [{'text': 'English'}]}
  kind: (none)
  vssId: .en

Track 2:
  languageCode: en
  name: {'runs': [{'text': 'English (auto-generated)'}]}
  kind: asr
  vssId: a.en

Track 3:
  languageCode: de-DE
  name: {'runs': [{'text': 'German (Germany)'}]}
  kind: (none)
  vssId: .de-DE

Track 4:
  languageCode: ja
  name: {'runs': [{'text': 'Japanese'}]}
  kind: (none)
  vssId: .ja

Track 5:
  languageCode: pt-BR
  name: {'runs': [{'text': 'Portuguese (Brazil)'}]}
  kind: (none)
  vssId: .pt-BR

Track 6:
  languageCode: es-419
  name: {'runs': [{'text': 'Spanish (Latin America)'}]}
  kind: (none)
  vssId: .es-419


In [28]:
# Step 2: Create Language objects from caption tracks
from yt_transcript_fetcher.protobuf import generate_params

def extract_language_name(name_obj):
    """Extract language name from caption track name object."""
    if isinstance(name_obj, dict):
        # Check for runs format
        runs = name_obj.get("runs", [])
        if runs:
            return runs[0].get("text", "Unknown")
        # Check for simpleText format
        return name_obj.get("simpleText", "Unknown")
    return str(name_obj)

def caption_tracks_to_languages(tracks, video_id):
    """
    Convert caption tracks from /player API to Language-like objects.
    
    Each Language object needs:
    - language_code: e.g., "en"
    - language_name: e.g., "English"
    - is_auto_generated: True for ASR captions
    - params: continuation token for /get_transcript
    """
    languages = []
    
    for track in tracks:
        lang_code = track.get("languageCode", "")
        name = extract_language_name(track.get("name", {}))
        is_auto = track.get("kind") == "asr"
        
        # Generate params for /get_transcript API
        params = generate_params(video_id, lang_code)
        
        languages.append({
            "language_code": lang_code,
            "language_name": name,
            "is_auto_generated": is_auto,
            "params": params,
        })
    
    return languages

# Test conversion
video_id = "dQw4w9WgXcQ"
languages = caption_tracks_to_languages(tracks, video_id)

print(f"Converted {len(languages)} languages:")
print("=" * 60)
for lang in languages:
    auto_tag = " (auto)" if lang["is_auto_generated"] else ""
    print(f"  [{lang['language_code']}] {lang['language_name']}{auto_tag}")
    print(f"      params: {lang['params'][:50]}...")

Converted 6 languages:
  [en] English
      params: CgtkUXc0dzlXZ1hjURISQ2dOaGMzSVNBbVZ1R2dBJTNEGAEqM2...
  [en] English (auto-generated) (auto)
      params: CgtkUXc0dzlXZ1hjURISQ2dOaGMzSVNBbVZ1R2dBJTNEGAEqM2...
  [de-DE] German (Germany)
      params: CgtkUXc0dzlXZ1hjURIWQ2dOaGMzSVNCV1JsTFVSRkdnQSUzRB...
  [ja] Japanese
      params: CgtkUXc0dzlXZ1hjURISQ2dOaGMzSVNBbXBoR2dBJTNEGAEqM2...
  [pt-BR] Portuguese (Brazil)
      params: CgtkUXc0dzlXZ1hjURIWQ2dOaGMzSVNCWEIwTFVKU0dnQSUzRB...
  [es-419] Spanish (Latin America)
      params: CgtkUXc0dzlXZ1hjURIUQ2dOaGMzSVNCbVZ6TFRReE9Sb0EYAS...


In [29]:
# Step 3: Fetch transcript using Android client with /get_transcript API
from yt_transcript_fetcher.protobuf import encode_visitor_data

def fetch_transcript_android(video_id, language_code="en"):
    """
    Fetch transcript using Android client (19.09.37) which doesn't require attestation.
    Returns list of segment dicts with start, duration, text.
    """
    android_client = {
        "clientName": "ANDROID",
        "clientVersion": "19.09.37",
        "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
        "osName": "Android",
        "osVersion": "11",
    }
    
    visitor_data = encode_visitor_data()
    context = {
        "client": {
            **android_client,
            "visitorData": visitor_data,
        }
    }
    
    body = {
        "context": context,
        "params": generate_params(video_id, language_code),
    }
    
    api_url = "https://www.youtube.com/youtubei/v1/get_transcript"
    api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": android_client["userAgent"],
        "X-Goog-Api-Key": api_key,
        "X-Goog-Visitor-Id": visitor_data,
    }
    
    response = requests.post(api_url, params={"key": api_key}, json=body, headers=headers, timeout=10)
    response.raise_for_status()
    
    data = response.json()
    
    # Parse the elementsCommand response format
    segments = []
    try:
        actions = data.get("actions", [])
        if not actions:
            return segments
        
        action = actions[0]
        elements_cmd = action.get("elementsCommand", {})
        transform_cmd = elements_cmd.get("transformEntityCommand", {})
        arguments = transform_cmd.get("arguments", {})
        transform_args = arguments.get("transformTranscriptSegmentListArguments", {})
        overwrite = transform_args.get("overwrite", {})
        initial_segments = overwrite.get("initialSegments", [])
        
        for seg in initial_segments:
            renderer = seg.get("transcriptSegmentRenderer", {})
            
            start_ms = int(renderer.get("startMs", 0))
            end_ms = int(renderer.get("endMs", 0))
            text = renderer.get("snippet", {}).get("elementsAttributedString", {}).get("content", "")
            
            if text:
                segments.append({
                    "start": start_ms / 1000,
                    "duration": (end_ms - start_ms) / 1000,
                    "text": text
                })
        
        return segments
    except Exception as e:
        print(f"Error parsing transcript: {e}")
        return segments

# Test fetching English transcript
print("Fetching English transcript...")
print("=" * 60)
segments = fetch_transcript_android("dQw4w9WgXcQ", "en")

print(f"Got {len(segments)} segments!")
print("\nFirst 10 segments:")
for seg in segments[:10]:
    print(f"  [{seg['start']:6.2f}s] {seg['text']}")

Fetching English transcript...
Got 52 segments!

First 10 segments:
  [  0.32s] [Music]
  [ 18.80s] We're no strangers to
  [ 21.80s] love. You know the rules and so do
  [ 25.96s] I. I feel commitments from what I'm
  [ 29.12s] thinking
  [ 30.28s] of. You wouldn't get this from any other
  [ 34.36s] guy. I just want to tell you how I'm
  [ 39.56s] feeling. Got to make you understand.
  [ 43.12s] Never going to give you up. I'm going to
  [ 45.84s] let you down. I'm going to run around


In [30]:
# Step 4: Test fetching a different language (Japanese)
print("Fetching Japanese transcript...")
print("=" * 60)
segments_ja = fetch_transcript_android("dQw4w9WgXcQ", "ja")

print(f"Got {len(segments_ja)} segments!")
print("\nFirst 10 segments:")
for seg in segments_ja[:10]:
    print(f"  [{seg['start']:6.2f}s] {seg['text']}")

Fetching Japanese transcript...
Got 0 segments!

First 10 segments:


In [31]:
# Debug: Let's see what the Japanese response looks like
import json

def get_raw_transcript_response(video_id, language_code):
    """Get raw response from /get_transcript for debugging."""
    android_client = {
        "clientName": "ANDROID",
        "clientVersion": "19.09.37",
        "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
        "osName": "Android",
        "osVersion": "11",
    }
    
    visitor_data = encode_visitor_data()
    context = {
        "client": {
            **android_client,
            "visitorData": visitor_data,
        }
    }
    
    body = {
        "context": context,
        "params": generate_params(video_id, language_code),
    }
    
    api_url = "https://www.youtube.com/youtubei/v1/get_transcript"
    api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": android_client["userAgent"],
        "X-Goog-Api-Key": api_key,
        "X-Goog-Visitor-Id": visitor_data,
    }
    
    response = requests.post(api_url, params={"key": api_key}, json=body, headers=headers, timeout=10)
    return response.status_code, response.json()

# Get Japanese response
status, data = get_raw_transcript_response("dQw4w9WgXcQ", "ja")
print(f"Status: {status}")
print(f"Top-level keys: {list(data.keys())}")

if "actions" in data:
    actions = data["actions"]
    print(f"Number of actions: {len(actions)}")
    if actions:
        print(f"Action keys: {list(actions[0].keys())}")
        
        # Check if it's elementsCommand or something else
        if "elementsCommand" in actions[0]:
            ec = actions[0]["elementsCommand"]
            print(f"elementsCommand keys: {list(ec.keys())}")
            
            tc = ec.get("transformEntityCommand", {})
            args = tc.get("arguments", {})
            tsla = args.get("transformTranscriptSegmentListArguments", {})
            overwrite = tsla.get("overwrite", {})
            segments = overwrite.get("initialSegments", [])
            print(f"Segments found: {len(segments)}")
            
            if not segments:
                # Maybe it's in a different path?
                print("\nFull elementsCommand structure:")
                print(json.dumps(ec, indent=2)[:2000])

Status: 200
Top-level keys: ['responseContext', 'actions', 'trackingParams', 'frameworkUpdates']
Number of actions: 1
Action keys: ['clickTrackingParams', 'elementsCommand']
elementsCommand keys: ['transformEntityCommand']
Segments found: 0

Full elementsCommand structure:
{
  "transformEntityCommand": {
    "identifier": "dQw4w9WgXcQ.transcript.full.state.key",
    "transform": {
      "types": [
        {
          "typeId": 2,
          "fieldType": "EKO_FIELD_TYPE_MESSAGE"
        },
        {
          "typeId": 12,
          "fieldType": "EKO_FIELD_TYPE_INT32"
        }
      ],
      "variables": [
        {
          "variableId": 1,
          "variableType": "EKO_VARIABLE_TYPE_INPUT"
        },
        {
          "variableId": 2,
          "typeId": 2,
          "variableType": "EKO_VARIABLE_TYPE_OUTPUT",
          "value": {
            "messageValue": {
              "fields": [
                {
                  "tag": 1,
                  "value": {
                    "

In [33]:
# The generate_params function has an auto_generated parameter!
# Japanese captions are NOT auto-generated (no 'asr' kind)
# Let's test with auto_generated=False

def fetch_transcript_android_v2(video_id, language_code="en", auto_generated=True):
    """
    Fetch transcript using Android client (19.09.37) which doesn't require attestation.
    Now with auto_generated parameter!
    """
    android_client = {
        "clientName": "ANDROID",
        "clientVersion": "19.09.37",
        "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
        "osName": "Android",
        "osVersion": "11",
    }
    
    visitor_data = encode_visitor_data()
    context = {
        "client": {
            **android_client,
            "visitorData": visitor_data,
        }
    }
    
    # Use auto_generated parameter!
    params = generate_params(video_id, language_code, auto_generated=auto_generated)
    
    body = {
        "context": context,
        "params": params,
    }
    
    api_url = "https://www.youtube.com/youtubei/v1/get_transcript"
    api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
    
    headers = {
        "Content-Type": "application/json",
        "User-Agent": android_client["userAgent"],
        "X-Goog-Api-Key": api_key,
        "X-Goog-Visitor-Id": visitor_data,
    }
    
    response = requests.post(api_url, params={"key": api_key}, json=body, headers=headers, timeout=10)
    response.raise_for_status()
    
    data = response.json()
    
    # Parse the elementsCommand response format
    segments = []
    try:
        actions = data.get("actions", [])
        if not actions:
            return segments
        
        action = actions[0]
        elements_cmd = action.get("elementsCommand", {})
        transform_cmd = elements_cmd.get("transformEntityCommand", {})
        arguments = transform_cmd.get("arguments", {})
        transform_args = arguments.get("transformTranscriptSegmentListArguments", {})
        overwrite = transform_args.get("overwrite", {})
        initial_segments = overwrite.get("initialSegments", [])
        
        for seg in initial_segments:
            renderer = seg.get("transcriptSegmentRenderer", {})
            
            start_ms = int(renderer.get("startMs", 0))
            end_ms = int(renderer.get("endMs", 0))
            text = renderer.get("snippet", {}).get("elementsAttributedString", {}).get("content", "")
            
            if text:
                segments.append({
                    "start": start_ms / 1000,
                    "duration": (end_ms - start_ms) / 1000,
                    "text": text
                })
        
        return segments
    except Exception as e:
        print(f"Error parsing transcript: {e}")
        return segments

# Test: Japanese with auto_generated=False (it's a manual caption)
print("Testing Japanese with auto_generated=False...")
print("=" * 60)
segments_ja = fetch_transcript_android_v2("dQw4w9WgXcQ", "ja", auto_generated=False)
print(f"Got {len(segments_ja)} segments!")

if segments_ja:
    print("\nFirst 10 segments:")
    for seg in segments_ja[:10]:
        print(f"  [{seg['start']:6.2f}s] {seg['text']}")
else:
    print("Still no segments - let's check what kinds are available...")
    for track in tracks:
        lang = track.get("languageCode", "")
        kind = track.get("kind", "(manual)")
        print(f"  {lang}: kind={kind}")

Testing Japanese with auto_generated=False...
Got 60 segments!

First 10 segments:
  [ 18.64s] 僕らは恋愛初心者じゃない
  [ 22.64s] ルールは互いに分かってる
  [ 27.04s] 君に全部
捧げても構わない
  [ 31.12s] 他の男は
こんなに尽くせないよ
  [ 35.16s] 僕の気持ちを
君に伝えたいんだ
  [ 40.52s] 君に分かってほしい
  [ 43.00s] 君を決して諦めない
  [ 45.20s] 決してがっかりさせない
  [ 47.32s] 言い訳したり
逃げたりしない
  [ 51.48s] 泣かせたりしない


## ✅ Complete Hybrid Solution

The solution requires:
1. **`/player` API** - Get caption tracks (languages) with their `kind` (asr vs manual)
2. **`/get_transcript` API with Android 19.09.37** - Fetch transcript segments
3. **Use `auto_generated` param correctly** - Based on `kind == "asr"` from caption tracks

The key insight is that `generate_params(video_id, lang, auto_generated=True)` adds "asr" to Field 1 of the nested protobuf. This must match the caption type!

In [34]:
# Complete Hybrid Implementation
# ==============================
# This is the complete solution that:
# 1. Uses /player API to get available languages
# 2. Uses /get_transcript with Android client to fetch transcripts

class HybridTranscriptFetcher:
    """Hybrid transcript fetcher using /player for languages and /get_transcript for content."""
    
    ANDROID_CLIENT = {
        "clientName": "ANDROID",
        "clientVersion": "19.09.37",  # Older version - no attestation required!
        "userAgent": "com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip",
        "osName": "Android",
        "osVersion": "11",
    }
    
    API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
    
    def __init__(self):
        self.session = requests.Session()
    
    def list_languages(self, video_id):
        """Get available caption languages using /player API."""
        visitor_data = encode_visitor_data()
        
        context = {
            "client": {
                **self.ANDROID_CLIENT,
                "visitorData": visitor_data,
            }
        }
        
        payload = {
            "context": context,
            "videoId": video_id,
            "playbackContext": {"contentPlaybackContext": {"html5Preference": "HTML5_PREF_WANTS"}},
            "contentCheckOk": True,
            "racyCheckOk": True
        }
        
        headers = {
            "Content-Type": "application/json",
            "User-Agent": self.ANDROID_CLIENT["userAgent"],
            "X-Youtube-Client-Name": "3",
            "X-Youtube-Client-Version": self.ANDROID_CLIENT["clientVersion"],
        }
        
        response = self.session.post(
            "https://www.youtube.com/youtubei/v1/player",
            params={"key": self.API_KEY},
            json=payload,
            headers=headers,
            timeout=15
        )
        response.raise_for_status()
        
        data = response.json()
        captions = data.get("captions", {})
        pctr = captions.get("playerCaptionsTracklistRenderer", {})
        tracks = pctr.get("captionTracks", [])
        
        languages = []
        for track in tracks:
            lang_code = track.get("languageCode", "")
            name_obj = track.get("name", {})
            name = name_obj.get("runs", [{}])[0].get("text", "") if "runs" in name_obj else name_obj.get("simpleText", "")
            is_auto = track.get("kind") == "asr"
            
            languages.append({
                "language_code": lang_code,
                "language_name": name,
                "is_auto_generated": is_auto,
            })
        
        return languages
    
    def get_transcript(self, video_id, language_code="en", auto_generated=True):
        """Fetch transcript using /get_transcript with Android client."""
        visitor_data = encode_visitor_data()
        
        context = {
            "client": {
                **self.ANDROID_CLIENT,
                "visitorData": visitor_data,
            }
        }
        
        body = {
            "context": context,
            "params": generate_params(video_id, language_code, auto_generated=auto_generated),
        }
        
        headers = {
            "Content-Type": "application/json",
            "User-Agent": self.ANDROID_CLIENT["userAgent"],
            "X-Goog-Api-Key": self.API_KEY,
            "X-Goog-Visitor-Id": visitor_data,
        }
        
        response = self.session.post(
            "https://www.youtube.com/youtubei/v1/get_transcript",
            params={"key": self.API_KEY},
            json=body,
            headers=headers,
            timeout=10
        )
        response.raise_for_status()
        
        data = response.json()
        return self._parse_android_response(data)
    
    def _parse_android_response(self, data):
        """Parse the elementsCommand response format from Android client."""
        segments = []
        
        actions = data.get("actions", [])
        if not actions:
            return segments
        
        action = actions[0]
        elements_cmd = action.get("elementsCommand", {})
        transform_cmd = elements_cmd.get("transformEntityCommand", {})
        arguments = transform_cmd.get("arguments", {})
        transform_args = arguments.get("transformTranscriptSegmentListArguments", {})
        overwrite = transform_args.get("overwrite", {})
        initial_segments = overwrite.get("initialSegments", [])
        
        for seg in initial_segments:
            renderer = seg.get("transcriptSegmentRenderer", {})
            start_ms = int(renderer.get("startMs", 0))
            end_ms = int(renderer.get("endMs", 0))
            text = renderer.get("snippet", {}).get("elementsAttributedString", {}).get("content", "")
            
            if text:
                segments.append({
                    "start": start_ms / 1000,
                    "duration": (end_ms - start_ms) / 1000,
                    "text": text
                })
        
        return segments


# Test the complete solution
print("Testing Complete Hybrid Solution")
print("=" * 60)

fetcher = HybridTranscriptFetcher()

# 1. List available languages
print("\n1. Listing available languages...")
languages = fetcher.list_languages("dQw4w9WgXcQ")
print(f"   Found {len(languages)} languages:")
for lang in languages:
    auto = " (auto)" if lang["is_auto_generated"] else ""
    print(f"   - [{lang['language_code']}] {lang['language_name']}{auto}")

# 2. Fetch English auto-generated transcript
print("\n2. Fetching English (auto-generated) transcript...")
en_segments = fetcher.get_transcript("dQw4w9WgXcQ", "en", auto_generated=True)
print(f"   Got {len(en_segments)} segments")
print(f"   First: [{en_segments[0]['start']:.1f}s] {en_segments[0]['text']}")

# 3. Fetch Japanese manual transcript
print("\n3. Fetching Japanese (manual) transcript...")
ja_segments = fetcher.get_transcript("dQw4w9WgXcQ", "ja", auto_generated=False)
print(f"   Got {len(ja_segments)} segments")
print(f"   First: [{ja_segments[0]['start']:.1f}s] {ja_segments[0]['text']}")

print("\n" + "=" * 60)
print("✅ Hybrid solution works! No attestation required.")

Testing Complete Hybrid Solution

1. Listing available languages...
   Found 6 languages:
   - [en] English
   - [en] English (auto-generated) (auto)
   - [de-DE] German (Germany)
   - [ja] Japanese
   - [pt-BR] Portuguese (Brazil)
   - [es-419] Spanish (Latin America)

2. Fetching English (auto-generated) transcript...
   Got 52 segments
   First: [0.3s] [Music]

3. Fetching Japanese (manual) transcript...
   Got 60 segments
   First: [18.6s] 僕らは恋愛初心者じゃない

✅ Hybrid solution works! No attestation required.
