In [2]:
import json

def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    return data

data = load_data("data/store_data.json")


In [3]:
def clean_data(data):
    text_to_num = {
        "one": 1, "two": 2, "three": 3,
        "four": 4, "five": 5
    }

    cleaned_data = []
    unique_users = set()

    for user in data:
        # ---- clean & normalize name ----
        name = user["name"].strip().lower()
        if name in unique_users:
            continue
        unique_users.add(name)

        # ---- clean rating ----
        raw_rating = str(user["rating"]).strip().lower()
        if raw_rating in text_to_num:
            rating = text_to_num[raw_rating]
        else:
            rating = float(raw_rating)

        # ---- clean age ----
        raw_age = user.get("age")
        age = int(raw_age) if raw_age is not None else None

        cleaned_data.append({
            "name": name,
            "rating": rating,
            "feedback": user.get("feedback", "").strip(),
            "age": age
        })

    return cleaned_data


In [4]:
cleaned_data = clean_data(data)
print(cleaned_data)


[{'name': 'alice', 'rating': 5.0, 'feedback': 'Great product!!', 'age': 25}, {'name': 'bob', 'rating': 4, 'feedback': 'ok but late Delivery', 'age': 30}, {'name': 'charlie', 'rating': 2, 'feedback': 'BAD EXPERIENCE', 'age': None}, {'name': 'diana', 'rating': 5.0, 'feedback': 'Loved it!', 'age': 28}, {'name': 'eve', 'rating': 3.5, 'feedback': 'Average - could be better', 'age': 20}]


In [5]:
import json

with open("data/store_data_cleaned.json", "w") as f:
    json.dump(cleaned_data, f, indent=4)


In [6]:
# To get mfeaningful Insights from data
def get_insights(data):

    # average rating
    tot_rating = 0
    for user in data:
        tot_rating += user["rating"]

    print(f"Average Rating = {tot_rating/len(data)}")

    #Percentage of users with poor ratings
    poor_ratings = 0
    for user in data:
        if(user["rating"] < 3):
            poor_ratings += 1
    print(f"% of user with poor rating = {poor_ratings/len(data) * 100}%")
            

In [7]:
get_insights(cleaned_data)

Average Rating = 3.9
% of user with poor rating = 20.0%


In [14]:
# To get meaningful insights from data
def get_insights(data):
    if not data:
        print("No data available for insights.")
        return

    # Average rating
    total_rating = 0
    for user in data:
        total_rating += user["rating"]

    avg_rating = total_rating / len(data)
    print(f"Average Rating = {avg_rating:.2f}")

    # Percentage of users with poor ratings
    poor_ratings = 0
    for user in data:
        if user["rating"] < 3:
            poor_ratings += 1

    poor_percentage = (poor_ratings / len(data)) * 100
    print(f"% of users with poor rating = {poor_percentage:.2f}%")


In [17]:
recommendation_data = get_recommendation(cleaned_data)
print(recommendation_data)

[{'name': 'alice', 'brand': 'Apple'}, {'name': 'bob', 'brand': 'Apple'}, {'name': 'charlie', 'brand': 'Samsung'}, {'name': 'diana', 'brand': 'Apple'}, {'name': 'eve', 'brand': 'Samsung'}]
