In [2]:
import json

In [4]:
def clean_data(data):
    # Remove users with missing names
    data["users"] = [user for user in data["users"] if user["name"].strip()]
    
    # Remove duplicate friends
    for user in data["users"]:
        user["friends"] = list(set(user["friends"]))
    
    # Remove inactive users
    data["users"] = [user for user in data["users"] if user["friends"] or user["liked_pages"]]
    
    # Remove duplicate pages. The dictionary only considers the single key if the key repeats itself so it overwrites the previous key    unique_pages = {}
    unique_pages = {}
    for page in data["pages"]:
        unique_pages[page["id"]] = page
    data["pages"] = list(unique_pages.values())
    
    return data

# Load, clean, and display the cleaned data
data = json.load(open("data2.json"))
data = clean_data(data)
json.dump(data, open("cleaned_data2.json", "w"), indent=4)
print("Data cleaned successfully!")

Data cleaned successfully!


In [None]:
def load_cleaned_data(filename):
    with open(filename, "r") as file:
        cleaned_data = json.load(file)
    return data
cleaned_data("cleaned_data2.json")

In [26]:
def user_insights(cleaned_data):
    # id to name lookup.
    id_to_name = {u['id'] : u['name'] for u in cleaned_data['users']}
    page_id_to_name = {p['id'] : p['name'] for p in cleaned_data['pages']}

    print("Users and their connections :\n")
    for user in cleaned_data['users']:
        friend_names = [id_to_name.get(fid, f"Unknown{fid}") for fid in user['friends']] 
        liked_pages_names = [page_id_to_name.get(pid, f"Unknown{pid}") for pid in user['liked_pages']]
        print(f"ID : {user['id']} -  {user['name']} is friends with {friend_names} and liked pages are {liked_pages_names}")

    print("\nPages Information :")
    for page in cleaned_data['pages']:
        print(f"{page['id']} : {page['name']}")

user_insights(cleaned_data)

Users and their connections :

ID : 1 -  Amit is friends with ['Priya', 'Unknown3'] and liked pages are ['Python Developers']
ID : 2 -  Priya is friends with ['Amit', 'Sara'] and liked pages are ['Data Science Enthusiasts']
ID : 4 -  Sara is friends with ['Priya'] and liked pages are ['Web Development']

Pages Information :
101 : Python Developers
102 : Data Science Enthusiasts
103 : AI & ML Community
104 : Web Development
