In [107]:
import json

In [108]:
def load_data(fileName):
    with open(fileName, "r") as f:
        data = json.load(f)
    
    return data

In [109]:
data = load_data("store_data.json")
print(data)

[{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': ' Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]


In [110]:
def clean_data(data):
    text_to_number ={
        'one' : 1,
        'two' : 2,
        'three' : 3,
        'four' : 4,
        'five' : 5       
    }
    cleaned_data = []
    unique_users = set()
    
    for user in data:
        #convert it to same format
        raw_rating = str(user['rating'].strip().lower())
        if raw_rating in text_to_number:
            user['rating'] = text_to_number[raw_rating]
            
        #remove null data
        if user.get('age') is None:
            user['age'] = None
        
        #deduplication
        if(user['name'].strip() in unique_users):
            continue
        unique_users.add(user['name'])
        cleaned_data.append(user)
        
    return cleaned_data
        
    

In [111]:
clean_info = clean_data(data)
print(clean_info)

[{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 4, 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': ' Charlie', 'rating': 2, 'feedback': 'BAD EXPERIENCE ', 'age': None}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}]


In [112]:
def get_insights(data):
    
    total_rating = 0
    
    for user in data:
        total_rating += float(user['rating'])
    
    avg_rating = total_rating / len(data)   
    print(avg_rating)
    
    poor_rating = 0
    for user in data:
        if(float(user['rating']) < 3):
            poor_rating += 1
            
    print(f"percent of user with poor rating {poor_rating / len(data) * 100}%")

get_insights(clean_info)
    

3.9
percent of user with poor rating 20.0%


In [113]:
def suggest_product(data):
    suggested_products = {}
    for user in data:
        if (float(user['rating']) >= 4):
            suggested_products |= {user['name'] : "Apple"} 
        else:
            suggested_products |= {user['name'] : "Samsung"}
    return suggested_products

In [114]:
suggest_product(clean_info)


{'Alice': 'Apple',
 'Bob': 'Apple',
 ' Charlie': 'Samsung',
 'Diana': 'Apple',
 'Eve': 'Samsung'}