In [1]:
import json

def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    return data

data = load_data("data/store_data.json")


In [2]:
def clean_data(data):
    text_to_num = {
        "one": 1, "two": 2, "three": 3,
        "four": 4, "five": 5
    }

    cleaned_data = []
    unique_users = set()

    for user in data:
        # ---- clean & normalize name ----
        name = user["name"].strip().lower()
        if name in unique_users:
            continue
        unique_users.add(name)

        # ---- clean rating ----
        raw_rating = str(user["rating"]).strip().lower()
        if raw_rating in text_to_num:
            rating = text_to_num[raw_rating]
        else:
            rating = float(raw_rating)

        # ---- clean age ----
        raw_age = user.get("age")
        age = int(raw_age) if raw_age is not None else None

        cleaned_data.append({
            "name": name,
            "rating": rating,
            "feedback": user.get("feedback", "").strip(),
            "age": age
        })

    return cleaned_data


In [3]:
cleaned_data = clean_data(data)
print(cleaned_data)


[{'name': 'alice', 'rating': 5.0, 'feedback': 'Great product!!', 'age': 25}, {'name': 'bob', 'rating': 4, 'feedback': 'ok but late Delivery', 'age': 30}, {'name': 'charlie', 'rating': 2, 'feedback': 'BAD EXPERIENCE', 'age': None}, {'name': 'diana', 'rating': 5.0, 'feedback': 'Loved it!', 'age': 28}, {'name': 'eve', 'rating': 3.5, 'feedback': 'Average - could be better', 'age': 20}]


In [4]:
import json

with open("data/store_data_cleaned.json", "w") as f:
    json.dump(cleaned_data, f, indent=4)
