## Create new dataframe from original df

### - replace null with 0 + count + percentage calculation + add new columns + melting + descending ranking

In [0]:
# create spark dataframe
generalmanager_pivot = spark.sql(
    "SELECT * FROM (SELECT GeneralManager, sentiment FROM df) AS a PIVOT (COUNT(*) AS count FOR sentiment IN ('positive', 'negative', 'neutral'))"
)
spark = SparkSession.builder.appName("DropNulls").getOrCreate()
generalmanager_pivot = generalmanager_pivot.dropna(subset=["GeneralManager"])
generalmanager_pivot.createOrReplaceTempView("generalmanager_pivot")
# generalmanager_pivot.display()


# to pandas df and replace null by 0
generalmanager_pivot_pd = generalmanager_pivot.toPandas()
generalmanager_pivot_pd.fillna(0, inplace=True)
# generalmanager_pivot_pd.display()


# count volume by sentiment
generalmanager_pivot_pd["total"] = (
    generalmanager_pivot_pd["positive"]
    + generalmanager_pivot_pd["negative"]
    + generalmanager_pivot_pd["neutral"]
)


# volume to percentage
generalmanager_pivot_pd["percentage_positive"] = round(
    generalmanager_pivot_pd["positive"] / generalmanager_pivot_pd["total"] * 100, 1
)
generalmanager_pivot_pd["percentage_negative"] = round(
    generalmanager_pivot_pd["negative"] / generalmanager_pivot_pd["total"] * 100, 1
)
generalmanager_pivot_pd["percentage_neutral"] = round(
    generalmanager_pivot_pd["neutral"] / generalmanager_pivot_pd["total"] * 100, 1
)
generalmanager_pivot_pd.drop(columns=["total"], inplace=True)
display(generalmanager_pivot_pd)


# melt df
generalmanager_pivot_pd_melted = generalmanager_pivot_pd.melt(
    id_vars=["GeneralManager"],
    value_vars=["percentage_positive", "percentage_negative", "percentage_neutral"],
    var_name="Sentiment",
    value_name="Percentage",
)


# descending ranking by positive rate
generalmanager_pivot_pd_melted[
    "percentage_positive"
] = generalmanager_pivot_pd_melted.apply(
    lambda row: generalmanager_pivot_pd[
        generalmanager_pivot_pd["GeneralManager"] == row["GeneralManager"]
    ]["percentage_positive"].values[0],
    axis=1,
)
generalmanager_pivot_pd_melted = generalmanager_pivot_pd_melted.sort_values(
    by="percentage_positive", ascending=False
)

In [0]:
dept_grouped_pd = dept_grouped_pd[dept_grouped_pd["deptmanager"].notna()]
dept_grouped_pd = dept_grouped_pd[dept_grouped_pd["deptmanager"] != "null"]

dept_grouped_pd_melted = dept_grouped_pd.melt(
    id_vars=["deptmanager"],
    value_vars=["percentage_positive", "percentage_negative", "percentage_neutral"],
    var_name="Sentiment",
    value_name="Percentage",
)

dept_grouped_pd_melted["percentage_positive"] = dept_grouped_pd_melted.apply(
    lambda row: dept_grouped_pd[dept_grouped_pd["deptmanager"] == row["deptmanager"]][
        "percentage_positive"
    ].values[0],
    axis=1,
)
dept_grouped_pd_melted = dept_grouped_pd_melted.sort_values(
    by="percentage_positive", ascending=False
)


plt.figure(figsize=(14, 10))
sns.barplot(
    data=dept_grouped_pd_melted,
    y="deptmanager",
    x="Percentage",
    hue="Sentiment",
    ci=None,
    palette=["salmon", "skyblue", "lightgreen"],
    dodge=False
)


plt.title("Sentiment Percentage by deptmanager", fontsize=16)
plt.ylabel("deptmanager", fontsize=14)
plt.xlabel("Percentage", fontsize=14)
plt.legend(title="Sentiment")
plt.show()

In [0]:
plt.figure(figsize=(16, 9))
sns.barplot(
    data=dept_grouped_pd_melted,
    y="deptmanager",
    x="Percentage",
    hue="Sentiment",
    errorbar=None,
    palette=["salmon", "skyblue", "lightgreen"],
    dodge=False,
    ax=axs[0, 1],
)

axs[0, 1].set_title("Group by Department Manager", fontsize=16)
axs[0, 1].set_ylabel("Department Manager", fontsize=14)
axs[0, 1].set_xlabel("Sentiment (%)", fontsize=14)