In [52]:
import json

In [53]:
# load the data
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)

    return data    

In [54]:
data = load_data("store_data.json")
print(data)
print(type(data))

[{'name': 'Sanju', 'rating': '5', 'feedback': 'Great Product', 'age': '20'}, {'name': 'Vinay', 'rating': 'four', 'feedback': 'Nice Product', 'age': '23'}, {'name': 'Patel', 'rating': 'two', 'feedback': 'BAD EXPERIENCE'}, {'name': 'Patel', 'rating': '5 ', 'feedback': 'Okay Product', 'age': '24'}, {'name': 'Ayush', 'rating': '3.5', 'feedback': 'Best Product', 'age': '22'}]
<class 'list'>


In [55]:
# Clean and structure the data
def clean_data(data):
    text_to_num = {"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}
    cleaned_data = []
    unique_users = set()
    
    for user in data:
        #clean ratings
        raw_rating = str(user["rating"]).strip().lower()
        if(raw_rating in text_to_num):
            raw_rating = text_to_num[raw_rating]
        user["rating"] = raw_rating

        # Handle missing values
        raw_age = user.get("age")
        if(raw_age == None):
            user["age"] = None

        # Handle duplicate  --> Deduplication
        if(user["name"].strip() in unique_users):
            continue
            
        unique_users.add(user["name"])   
        cleaned_data.append(user)
    
    return cleaned_data   
        

In [56]:
data = clean_data(data)

In [64]:
# Get meaningful insights from data
def get_insights(data):

    #avg rating
    total_rating = 0
    for user in data:
        total_rating += float(user["rating"])
    avg_rating = total_rating/len(data)
    print(f"Average rating = {avg_rating}")

    # percentage of users with poor rating
    poor_ratings = 0
    for user in data:
        if(float(user["rating"]) < 3):
            poor_ratings += 1
    poor_rating_percentage = (poor_ratings/len(data))*100
    print(f"Poor rating percentage = {poor_rating_percentage}%")

In [65]:
get_insights(data)

Average rating = 3.625
Poor rating percentage = 25.0%


In [71]:
# Build the recommendation feature
def get_recommendations(data):
    recommendations = []

    for user in data:
        curr_recomm = {}
        curr_recomm["name"] = user["name"]
        
        if (float(user["rating"]) >= 4):
            curr_recomm["brand"] = "Apple"
        else:
            curr_recomm["brand"] = "Samsung"
        recommendations.append(curr_recomm)        
            
    return recommendations       

In [72]:
get_recommendations(data)

[{'name': 'Sanju', 'brand': 'Apple'},
 {'name': 'Vinay', 'brand': 'Apple'},
 {'name': 'Patel', 'brand': 'Samsung'},
 {'name': 'Ayush', 'brand': 'Samsung'}]