In [4]:
import json

In [5]:
# Load the Data
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f);

    return data;

In [6]:
data = load_data("storeData.json")
print(data, type(data));

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great Product!', 'age': '35'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'Ok, but late delivery', 'age': '38'}, {'name': 'Charlie', 'rating': 'two', 'feedback': 'Bad Experience'}, {'name': 'Dlana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great Product again!', 'age': '25'}] <class 'list'>


In [7]:
# Clean & structure the data
def clean_data(data):
    text_to_num = {"one":1, "two":2, "three":3, "four":4, "five":5};
    cleaned_data = [];
    unique_users = set();
    for user in data:
        #clean rating
        raw_rating = str(user["rating"]).strip().lower();
        if(raw_rating in text_to_num):
            raw_rating = text_to_num[raw_rating];
        user["rating"] = raw_rating;

        # Handle missing vals
        raw_age = user.get("age");
        if(raw_age == None):
            user["age"] = None;

        # Deduplication 
        if(user["name"].strip() in unique_users):
            continue;
        unique_users.add(user["name"]);
        
        cleaned_data.append(user);
        
    print(cleaned_data);

In [8]:
clean_data(data);

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great Product!', 'age': '35'}, {'name': 'Bob', 'rating': 4, 'feedback': 'Ok, but late delivery', 'age': '38'}, {'name': 'Charlie', 'rating': 2, 'feedback': 'Bad Experience', 'age': None}, {'name': 'Dlana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}]


In [9]:
# Get meaningful insights from data
def get_insights(data):

    # avg rating
    tot_rating = 0;
    for user in data:
        tot_rating += float(user["rating"]);

    print(f"Average Rating = {tot_rating/len(data)}")

    # percentage of users with poor rating
    poor_rating = 0;
    for user in data:
        if(float(user["rating"]) < 3):
            poor_rating += 1;

    print(f"Percentage of user with poor rating = {poor_rating/len(data) * 100}%");

In [10]:
get_insights(data);

Average Rating = 4.2
Percentage of user with poor rating = 20.0%


In [16]:
# Get Recommendation 
def get_recommendation(data):
    recommendation = [];
    for user in data:
        curr_recomm = {}
        curr_recomm["name"] = user["name"];
        if (float(user["rating"]) >= 4):
            curr_recomm["brand"] = "Apple";
        else:
            curr_recomm["brand"] = "Samsung";
        recommendation.append(curr_recomm);
        
    print(recommendation);

In [17]:
get_recommendation(data);

[{'name': 'Alice', 'brand': 'Apple'}, {'name': 'Bob', 'brand': 'Apple'}, {'name': 'Charlie', 'brand': 'Samsung'}, {'name': 'Dlana', 'brand': 'Apple'}, {'name': 'Alice', 'brand': 'Apple'}]
