Imports

In [None]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [None]:
if not os.path.exists("./opinions"):
    os.mkdir("./pie_charts")
if not os.path.exists("./bar_charts"):
    os.mkdir("./bar_charts")

Getting opinions about produuct from database

In [None]:
print(*[file_name.split('.')[0] for file_name in os.listdir("./opinions")], sep="\n") # unpacking list comprehension using [*], using arg sep="\n" to seperate the print values in the 

In [None]:
product_id = input("Please enter the product code: ")
opinions = pd.read_json("./opinions/"+product_id+".json")

Calculating basic statistics

In [None]:
opinions_count = opinions.shape[0]
pros_count = sum(opinions.pros_pl.astype(bool)) # converts the list into a bool [empty]/[not-empty], the sum counts trues a 1s and falses as 0s
cons_count = sum(opinions.cons_pl.astype(bool))
pros_cons_count = opinions.apply(lambda opinion: bool(opinion.pros_pl) and bool(opinion.cons_pl), axis=1).sum() 
# apply is a function that iterates over an object and applies a function
# axis=1 specifies we want to apply this function to every row (row by row)
# we're using the lambda function to compare the two elements in eaach row

average_score = opinions.score.mean()
print(f"Number of opinions about the product: {opinions_count}")
print(f"Number of opinions with advantages listed: {pros_count}")
print(f"Number of opinions with disadvantages listed: {cons_count}")
print(f"Number of opinions for which advantages and disadvantages are listed: {pros_cons_count}")
print(f"Average score of all opinions: {average_score: .2f}")

In [None]:
pros = opinions.pros_en.explode().value_counts() # flattens from a series of lists to a series of strings; counts number of unique elements
cons = opinions.cons_en.explode().value_counts()

Drawing charts

In [None]:
recommendations = opinions.recommendation.value_counts(dropna=False).reindex([True, False, None], fill_value=0) # reindex orders based on values in [], based on variable, (T,F,N)
plt.figure(figsize=(7,5))

In [None]:
recommendations.plot.pie(
    label = "",
    labels = ["Recommend", "Not recommend", "No opinion"], # same order as stated in the reindex order statement above
    colors = ["forestgreen", "crimson", "steelblue"], # colours
    autopct = lambda r: f"{r:.1f}%" if r > 0 else "" # function that returns a percentage value only if greate than 0%, (exclude) from chart
)
plt.title(f"recommendations for product {product_id}")
plt.savefig(f"./pie_charts/{product_id}.png")
plt.plot()

In [None]:
scores = opinions.score.value_counts().reindex(list(np.arange(0.5,5.5,0.5)), fill_value=0)

In [None]:
ax = scores.plot.bar(
    color = ["forestgreen" if s > 3.5 else "crimson" if s < 3 else "steelblue" for s in scores.index]
)
plt.bar_label(container=ax.containers[0])
plt.xlabel("Score")
plt.ylabel("Number of opinions")
no_opinions = len(opinions)
plt.title("Number of opinions about {product_id} by their respective scores.\nTotal number of opinions: {no_opinions}")
plt.xticks(rotation=0)
plt.savefig(f"./bar_charts/{product_id}.png")
plt.show()