In [1]:
import json

def clean_data(data):
    # Remove users with missing or empty names
    data["users"] = [
        user for user in data.get("users", [])
        if user.get("name") and user["name"].strip()
    ]
    
    # Remove duplicate friends (keep only unique)
    for user in data["users"]:
        if "friends" in user and isinstance(user["friends"], list):
            user["friends"] = list(set(user["friends"]))
        else:
            user["friends"] = []
    
    # Remove inactive users (no friends and no liked_pages)
    data["users"] = [
        user for user in data["users"]
        if user.get("friends") or user.get("liked_pages")
    ]
    
    # Remove duplicate pages by id
    unique_pages = {}
    for page in data.get("pages", []):
        if "id" in page:
            unique_pages[page["id"]] = page
    data["pages"] = list(unique_pages.values())
    
    return data


# Load JSON data from file
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    return data


# Example usage
if __name__ == "__main__":
    raw_data = load_data("data.json")
    cleaned_data = clean_data(raw_data)
    print(json.dumps(cleaned_data, indent=4))


{
    "users": [
        {
            "id": 1,
            "name": "Amit",
            "friends": [
                2,
                3
            ],
            "liked_pages": [
                101
            ]
        },
        {
            "id": 2,
            "name": "Priya",
            "friends": [
                1,
                4
            ],
            "liked_pages": [
                102
            ]
        },
        {
            "id": 3,
            "name": "Rahul",
            "friends": [
                1
            ],
            "liked_pages": [
                101,
                103
            ]
        },
        {
            "id": 4,
            "name": "Sara",
            "friends": [
                2
            ],
            "liked_pages": [
                104
            ]
        }
    ],
    "pages": [
        {
            "id": 101,
            "name": "Python Developers"
        },
        {
            "id": 102,
            "name": 