In [1]:
import json

In [2]:
#load data
def load_data(filename):
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

In [4]:
data = load_data('../data/raw/store_data.json')
print(data, type(data))

[{'name': 'Alice', 'rating': '5  ', 'feedback': 'Great product!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'Good value for money.', 'age': '30'}, {'name': 'Charlie', 'rating': 'two', 'feedback': 'Average experience.'}, {'name': 'Diana', 'feedback': 'Exceeded my expectations!', 'rating': ' 5', 'age': '28'}, {'name': 'Ethan', 'rating': '3.5', 'feedback': 'Not satisfied with the quality.', 'age': '20'}, {'name': 'Alice', 'rating': 'Five', 'feedback': 'Pretty good overall.', 'age': '25'}] <class 'list'>


In [5]:
# clean & structure the data
def clean_data(data):
    text_to_num = {"one": '1', "two": '2', "three": '3', "four": '4', "five": '5'}
    cleaned_data = []
    unique_users = set()
    
    for user in data:
        # Clean ratings - data consistency
        row_rating = str(user['rating']).strip().lower()
        if(row_rating in text_to_num):
            row_rating = text_to_num[row_rating]
        user['rating'] = row_rating

        # Handling missing vals
        row_age = user.get("age")  # return None if not age
        if(row_age == None):
            user["age"] = None

        # Deduplication
        if(user["name"] in unique_users):
            continue

        unique_users.add(user["name"])
        cleaned_data.append(user)

    return cleaned_data
        

In [6]:
data = clean_data(data)

In [7]:
# meaningful insights
def get_insights(data):
    total_rating = 0
    for user in data:
        total_rating += float(user['rating'])
    print(f"Average rating: {total_rating/len(data)}")

    poor_ratings = 0
    for user in data:
        if float(user["rating"]) < 3:
            poor_ratings += 1
    print(f"Average of poor rating: {poor_ratings/len(data) * 100}")

In [8]:
get_insights(data)

Average rating: 3.9
Average of poor rating: 20.0


In [9]:
def get_recommendations(data):
    recommendations = []

    for user in data:
        curr_recomm = {}
        curr_recomm['name'] = user["name"]
        if(float(user["rating"]) >= 4):
            curr_recomm["brand"] = "Apple"
        else:
            curr_recomm["brand"] = "Samsung"
        recommendations.append(curr_recomm)
    return recommendations

In [10]:
get_recommendations(data)

[{'name': 'Alice', 'brand': 'Apple'},
 {'name': 'Bob', 'brand': 'Apple'},
 {'name': 'Charlie', 'brand': 'Samsung'},
 {'name': 'Diana', 'brand': 'Apple'},
 {'name': 'Ethan', 'brand': 'Samsung'}]