In [1]:

import requests
import json
import pandas as pd
import sys


In [2]:

def get_json_data(url, timeout=10, max_retries=3, show_stats=True):
    """Downloads a JSON file from a URL with robust error handling and data summary.

    Args:
        url: The URL of the JSON file.
        timeout: The maximum number of seconds to wait for a server response.
        max_retries: The maximum number of retries to attempt download in case of errors.
        show_stats: Boolean to indicate whether to show descriptive statistics.

    Returns:
        A dictionary containing the JSON data, or None if an error occurs after retries.
    """
    data = None

    for attempt in range(1, max_retries + 1):
        try:
            response = requests.get(url, timeout=timeout)
            response.raise_for_status()

            # Parse the response content as JSON
            data = response.json()

            # Download successful, break out of retry loop
            break

        except (requests.exceptions.RequestException, TimeoutError) as e:
            print(f"Error downloading JSON from {url} (attempt {attempt}/{max_retries}): {e}")
            if attempt == max_retries:
                print(f"Failed to download JSON after {max_retries} attempts.")
                return None

        except json.JSONDecodeError as e:
            print(f"Error parsing JSON data: {e}")
            return None

    # Print the downloaded data dictionary
    print("\nDownloaded Data:")
    print(json.dumps(data, indent=4, ensure_ascii=False))

    # Calculate and print the size of the JSON object
    json_size = sys.getsizeof(data)
    print(f"\nSize of the JSON object: {json_size} bytes")

    # Data Description using pandas (assuming data is a dictionary)
    if isinstance(data, dict):
        print("\nData Description:")

        try:
            # Convert data to a pandas DataFrame (if possible)
            df = pd.DataFrame([data])  # Wrap the dictionary in a list to create a single-row DataFrame

            # Print total rows
            print(f"\nTotal rows: {len(df)}")

            # Explore some basic information about the data
            print("\nBasic Info:")
            print(df.info())

            # Print column names and data types
            print("\nColumn Names and Data Types:")
            print(df.dtypes)

            # Print column headers
            print("\nColumn Headers:")
            print(df.columns.tolist())

            # Check for missing values
            print("\nMissing Values:")
            print(df.isnull().sum())

            if show_stats:
                # Descriptive statistics for numerical columns
                print("\nDescriptive Statistics:")
                print(df.describe(include='all'))

        except (ValueError, KeyError) as e:
            print(f"Error converting data to pandas DataFrame: {e}")

    return data


In [3]:
url = "https://servicios.ine.es/wstempus/js/ES/SERIE/CENSO1568062"
data = get_json_data(url, timeout=10, max_retries=3)



Downloaded Data:
{
    "Id": 1568062,
    "COD": "CENSO1568062",
    "FK_Operacion": 463,
    "Nombre": "Población total. Valor",
    "Decimales": 0,
    "FK_Periodicidad": 12,
    "FK_Publicacion": 31,
    "FK_Clasificacion": null,
    "FK_Escala": 1,
    "FK_Unidad": 3
}

Size of the JSON object: 272 bytes

Data Description:

Total rows: 1

Basic Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Id                1 non-null      int64 
 1   COD               1 non-null      object
 2   FK_Operacion      1 non-null      int64 
 3   Nombre            1 non-null      object
 4   Decimales         1 non-null      int64 
 5   FK_Periodicidad   1 non-null      int64 
 6   FK_Publicacion    1 non-null      int64 
 7   FK_Clasificacion  0 non-null      object
 8   FK_Escala         1 non-null      int64 
 9   FK_Unidad         1 non-null  

<h1> Data set provided by the Madrid Community .. </h1>