In [71]:
import requests
from dotenv import load_dotenv
import os
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List
import json
from datetime import datetime
from pprint import pprint

os.makedirs("responses", exist_ok=True)
os.makedirs("responses/raw", exist_ok=True)

In [None]:
load_dotenv()

@dataclass
class BasicConfig:
    api_key: str = field(default_factory=lambda: os.getenv("GOOGLE_API_KEY"))
    base_url: str = field(default="https://www.googleapis.com/youtube/v3")

    

In [73]:
# Exploring responses

urldefaults = BasicConfig()
def get_url(endpoint: str, params: dict = None) -> str:
    if params is None:
        params = {}
    params['key'] = urldefaults.api_key
    return f"{urldefaults.base_url}{endpoint}?{requests.compat.urlencode(params)}"

def get_response(endpoint: str, params: dict = None, dump: bool = False, max_pages: int = 1, base_file_name: Optional[str] = None) -> List[Dict[str, Any]]:
    results = []
    page_count = 0
    next_page_token = None

    if base_file_name:
        base_name_for_dump: str = os.path.splitext(base_file_name)[0]
    else:
        base_name_for_dump: str = f"responses/raw/{endpoint.replace('/', '_')}_response"


    while page_count < max_pages:
        if next_page_token:
            params = params.copy() if params else {}
            params['pageToken'] = next_page_token
        url = get_url(endpoint, params) 
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(f"Error: {response.status_code} - {response.text}")
        data = response.json()
        results.append(data)

        if dump:
            print(f"Dumping page {page_count + 1}...")
            page_suffix: str = f"_page_{page_count+1}"
            timestamp: str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
            current_file_name: str = f"{base_name_for_dump}{page_suffix}_{timestamp}.json"

            with open(current_file_name, 'w') as f:
                json.dump(data, f, indent=4)
            print(f"Dumped to: {current_file_name}")

        next_page_token = data.get('nextPageToken')
        if not next_page_token:
            break
        page_count += 1

    return results


In [74]:
parameters: Dict[str, str] = {
    "part": "snippet",
    "chart": "mostPopular",
    "regionCode": "IN",
}

url = get_url("/videos", parameters)
response = get_response("/videos", parameters, dump=True, max_pages=2)

Dumping page 1...
Dumped to: responses/raw/_videos_response_page_1_2025_07_31_08_02_39.json
Dumping page 2...
Dumped to: responses/raw/_videos_response_page_2_2025_07_31_08_02_39.json


In [75]:
def extract_field_types(data: List[Dict[str, Any]]) -> List[Any]:
    """Extracts field names and their types from a list of dictionaries."""
    def get_types(obj) -> Any:
        if isinstance(obj, dict):
            fields = {}
            for k, v in obj.items():
                if isinstance(v, dict):
                    fields[k] = get_types(v)
                elif isinstance(v, list) and v:
                    if isinstance(v[0], dict):
                        fields[k] = [get_types(v[0])]
                    else:
                        fields[k] = [type(v[0]).__name__]
                else:
                    fields[k] = type(v).__name__ if v is not None else None
            return fields
        return type(obj).__name__
    return [get_types(item) for item in data]


In [76]:
fields = extract_field_types(data=response)

In [77]:
pprint(fields)

[{'etag': 'str',
  'items': [{'etag': 'str',
             'id': 'str',
             'kind': 'str',
             'snippet': {'categoryId': 'str',
                         'channelId': 'str',
                         'channelTitle': 'str',
                         'defaultAudioLanguage': 'str',
                         'defaultLanguage': 'str',
                         'description': 'str',
                         'liveBroadcastContent': 'str',
                         'localized': {'description': 'str', 'title': 'str'},
                         'publishedAt': 'str',
                         'tags': ['str'],
                         'thumbnails': {'default': {'height': 'int',
                                                    'url': 'str',
                                                    'width': 'int'},
                                        'high': {'height': 'int',
                                                 'url': 'str',
                                                 'wi