# <center>Python Data Visualization<center>
## <center>First Sub-Goal: Employees Performance & Productivity Analysis<center>
______________________________________________________________________________
**<center>Name:<center>**
## <center>Ali Mir<center>

**<center>GitHub:<center>** **<center>https://github.com/Alii-Mir<center>**
______________________________________________________________________________

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import matplotlib.gridspec as gridspec
import matplotlib.ticker as ticker
from matplotlib.patches import Circle

In [2]:
df_calend = pd.read_csv('../data/calendar.csv')
df_employ = pd.read_csv('../data/employees.csv')
df_land = pd.read_csv('../data/landscaping.csv')


---

## Data Processing

In [3]:
j_ids = df_land["job_id"].tolist()
df_land_2 = df_land.copy()
df_land_2['employees'] = pd.Series(dtype='object')
df_land_2['hourly_wages'] = pd.Series(dtype='object')
df_land_2['wt_satisfaction_mean'] = pd.Series(dtype='object')

for i, j in enumerate(j_ids):
    df_calend_sub = df_calend.loc[:, (df_calend == j).any()]
    j_employees = df_calend_sub.columns.tolist()
    
    df_employ_sub = df_employ.query('employee_id in @j_employees')
    wages = df_employ_sub["hourly_wage"].tolist()

    df_land_2.at[i, 'employees'] = j_employees
    df_land_2.at[i, 'hourly_wages'] = wages
    
    df_calend_map = df_calend_sub.apply(lambda x: x.map(df_land_2.set_index('job_id')['customer_satisfaction']))
    means=[]; weights=[]
    columns=df_calend_map.columns.tolist()
    col_nums=len(df_calend_map.columns)
    for c in range(0, col_nums):
        # each employee
        df=df_calend_map.iloc[:,c].dropna()
        mean=df.mean() 
        means.append(mean)
        weight=df.shape[0]
        weights.append(weight)
    w_avg=sum(np.array(weights)*np.array(means))/sum(weights)
    Mean=round(w_avg, 2)
    df_land_2.at[i, 'wt_satisfaction_mean'] = Mean

df_land_2.head(2)

Unnamed: 0,job_id,job_type,invoice_amount,material_costs,request_date,start_date,completion_date,customer_id,customer_type,customer_postal_code,customer_satisfaction,employees,hourly_wages,wt_satisfaction_mean
0,j_185274,basic_lawncare,240.0,76,2022-03-01,2022-04-01,2022-04-02,c_3ac965,residential,A1H,2,[e_7ace5d],[18.0],5.3
1,j_8bd7f7,basic_lawncare,276.0,68,2022-03-01,2022-04-26,2022-04-26,c_3ac965,residential,A1H,7,[e_97c2f5],[18.0],5.1


---

## Visualization1: Interactive, Explanatory, Somewhat Exploratory, Derived Data

Note: The scond axis which is User Guide was tried to be separate and defined outside of function; but as it was attached to the same fig, it was too hard to be defined outside. Overal, the code has good speed.

In [4]:
actual_j_names = ['All'] + df_land_2["job_type"].unique().tolist()
fancy_names = [name.replace('_', ' ').title() for name in actual_j_names]
name_dict = dict(zip(fancy_names, actual_j_names))
#----------------------------------------------------------------------------------------------
@interact(j_type = widgets.Dropdown(options=fancy_names, value='All', description="Job Type:"))
def f3(j_type):
    
    j_legend = j_type
    j_type = name_dict[j_type]
    
    if j_type == "All":
        df_f3 = df_land_2
    else:
        df_f3 = df_land_2[df_land_2.job_type == j_type]
    #------------------------------------------------------------------------
    fig = plt.figure(figsize=(20, 10), dpi=200)
    gs = gridspec.GridSpec(1, 2, width_ratios=[2.5, 1])

    ax1 = plt.subplot(gs[0])
    ax2 = plt.subplot(gs[1])
    
    # ax1.clear()
    #------------------------------------------------------------------------
    df_violin = df_f3.astype({'customer_satisfaction':'str', 'wt_satisfaction_mean':'float64'})
    order = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
    df_means=df_f3.groupby(["customer_satisfaction"])["wt_satisfaction_mean"].mean().reset_index()
    #------------------------------------------------------------------------
    # plot your violin plot on ax1
    sns.violinplot(x='wt_satisfaction_mean', y='customer_satisfaction', data=df_violin, ax=ax1, \
               order=order, inner="stick", scale='count', linewidth=0.8)
    
    # plot your mean satisfaction rates on ax1
    X=df_means.wt_satisfaction_mean; Y=df_means.customer_satisfaction
    
    sns.scatterplot(x=X, y=Y-1, color='snow',\
                markers=True, marker="o", edgecolor='black', linewidth=1, s=50, ax=ax1)

    # create a marker artist
    marker = plt.Line2D([0,0],[0,0],color='w',markerfacecolor='snow', marker='o',markersize=10, markeredgecolor='black')
    # add the marker to the legend
    legend = ax1.legend([marker], [': Mean of Distribution Data'],loc='upper left', framealpha=0, handletextpad=0.1)
    #------------------------------------------------------------------------
    # customize ax1
    ax1.set(xlim = (4.5, 8.5))
    ax1.set_xlabel('Weighted Average Satisfaction Score by Job ID', labelpad=15, fontsize=10,) #Job Calculated Mean Satisfaction
    # pad
    ax1.set_ylabel('Customer Satisfaction Class', labelpad=10, fontsize=10,)
    ax1.tick_params(axis='y', pad=50)
    ax1.invert_yaxis()
    ax1.tick_params(axis='y', which='both', length=0)
    #------------------------------------------------------------------------
    # Define the positions of the annotations and text boxes
    pos0 = (0.5, 0.95); pos1 = (0.5, 0.9); pos2 = (0.5, 0.83); pos3 = (0.5, 0.75)
    pos4 = (0.5, 0.67); pos5 = (0.5, 0.58); pos6 = (0.5, 0.48); pos7 = (0.5, 0.38)
    pos8 = (0.5, 0.28); pos9 = (0.5, 0.18); pos10 = (0.5, 0.08)
    
    # Add the annotations and text boxes to the axis
    ax2.annotate("", xy=(pos1[0],pos1[1]-0.02), xytext=(pos2[0],pos2[1]+0.01), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos2[0],pos2[1]-0.02), xytext=(pos3[0],pos3[1]+0.01), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos3[0],pos3[1]-0.02), xytext=(pos4[0],pos4[1]+0.01), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos4[0],pos4[1]-0.02), xytext=(pos5[0],pos5[1]+0.02), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos5[0],pos5[1]-0.02), xytext=(pos6[0],pos6[1]+0.02), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos6[0],pos6[1]-0.02), xytext=(pos7[0],pos7[1]+0.02), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos7[0],pos7[1]-0.02), xytext=(pos8[0],pos8[1]+0.02), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos8[0],pos8[1]-0.02), xytext=(pos9[0],pos9[1]+0.02), arrowprops=dict(arrowstyle="<-", lw=2))
    ax2.annotate("", xy=(pos9[0],pos9[1]-0.02), xytext=(pos10[0],pos10[1]+0.02), arrowprops=dict(arrowstyle="<-", lw=2))

    ax2.text(*pos0, r"$\bf{User~Guide~(Data~Processing~Break~Down):}$", fontsize=10, ha="center", va="center") #ha='center', va='center'
    ax2.text(*pos1, "Select either a specific job type or all jobs from drop-down menu", fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos2, "Choose a specific customer satisfaction rating (e.g., 10)",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos3, "Retrieve all job IDs with that rating", ha="center", va="center",  fontsize=10, bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos4, "Extract the employees who were involved in each of the job IDs",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos5, "For each employee, calculate the mean customer satisfaction rating\nacross all their worked job IDs",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos6, "Use the number of job IDs worked by each employee\nas their weight in the following calculation",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos7, "Calculate the weighted average of all the\ninvolved employees' satisfaction ratings (for each job ID)",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos8, "For all job IDs belonging to the same customer satisfaction rating,\nwe have calculated the average satisfaction data", fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos9, "Plot a violin plot of the calculated satisfaction data\nto see their frequency and distribution",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    ax2.text(*pos10, "Check the mean of the distribution. For example, if the mean is near 7,\nit's likely that the job pertains to a customer satisfaction rating of 10",  fontsize=10, ha="center", va="center", bbox=dict(facecolor="white", edgecolor="black"))
    #------------------------------------------------------------------------
    # Set the limits of the axis
    ax2.set_xlim(0, 1); ax2.set_ylim(0, 1)
    
    # Hide the tick marks and labels
    ax2.tick_params(axis="both", which="both", length=0, labelsize=0)
    #------------------------------------------------------------------------
    ax2.axis('off')
    ax1.spines[['left', 'right', 'top']].set_visible(False)
    fig.suptitle(f'''Jobs Estimated Mean Satisfaction Scores Distributions Belonging to Different Customer Satisfaction Rates (Selected Job Type: {j_legend})''', fontsize=12)
    plt.tight_layout()
    
    # plt.savefig('1_violin.png', facecolor='w', dpi=1000)
    plt.show()
    

interactive(children=(Dropdown(description='Job Type:', options=('All', 'Basic Lawncare', 'Garden Landscaping'…

___

## Visualization2: Interactive, Exploratory, Derived Data

In [5]:
@interact(e_id = widgets.Text(value="e_5bfce2", description='Employee ID:',placeholder='Enter Employee ID (e.g., e_5bfce2)',\
                              layout=widgets.Layout(width='35%')))
def employee(e_id):
    
    e_id = e_id.lower()
    unique_ids = df_employ['employee_id'].str.lower().unique()
    if not e_id.lower() in unique_ids:
        return "No such employee ID was found!"
    #------------------------------------------------------------------------    
    # e_id = str(e_id)
    df_land_e = df_land.copy()
    df_e = df_calend.loc[:, ["date", e_id]]
    
    df_e['job_type'] = df_e[e_id] # a column with job ids of an employee
    df_e['satisfaction'] = df_e[e_id]
    
    df_e['job_type'] = df_e['job_type'].replace(df_land_e.set_index('job_id')['job_type'])
    df_e['satisfaction'] = df_e['satisfaction'].replace(df_land_e.set_index('job_id')['customer_satisfaction'])
    
    # job_types = df_e["job_type"].unique().tolist()
    #-------------------------------------------------------------------------------------------------
    df_grouped = df_e.groupby('job_type').agg({'satisfaction': 'mean', 'date': 'count'}).\
                sort_values(by="satisfaction").reset_index().round({'satisfaction': 0})
    df_grouped["fancy_job_type_names"] = df_grouped["job_type"].str.replace('_', ' ').str.title()
    job_types = df_grouped["job_type"].unique().tolist()
    fancy_job_type_names = df_grouped["fancy_job_type_names"].unique().tolist()
    # figsize=(7, 5), dpi=200
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,6), sharey=True, dpi=300)
    #------------------------------------------------------------------------------------------------- ax1
    df_grouped.plot(kind='barh', x="fancy_job_type_names", y="satisfaction", ax=ax1, color='red', legend=False)
    ax1.set_title(r'${\bf Mean~Satisfaction}$'+'\n(For Jobs Contributed by Selected Employee)', pad=10, fontsize=9)
    ax1.set_xlabel('Mean Satisfaction (rounded)', labelpad=5)
    ax1.set(xlim=(0,10))
    ax1.set_yticklabels(fancy_job_type_names, ha='center', fontsize=9) # center align
    
    # ax1.yaxis.set_label_position("right")
    ax1.yaxis.tick_right()
    ax1.set_ylabel(None)
    ax1.tick_params(axis="y", which="both", length=0, pad=60) # hide ticks # pad
    ax1.invert_xaxis()
    #------------------------------------------------------------------------------------------------- ax2
    df_grouped.plot(kind='barh', x="fancy_job_type_names", y="date", ax=ax2, color='blue', legend=False)
    ax2.set_title(r'${\bf Dates~Count}$'+'\n(Worked by Selected Employee)', pad=10, fontsize=9)
    ax2.set_xlabel('Dates Count', labelpad=5)
    ax2.set(xlim=(0,df_grouped.date.max()//5 * 5))
    ax2.tick_params(axis="y", which="both", length=0)
    #-----------------------------------------------------------------------------------------------------
    # import matplotlib.ticker as ticker
    ax1.xaxis.set_minor_locator(ticker.MultipleLocator(1))
    ax1.grid(True, axis='x', which='both', linestyle='--', color='gray', alpha=0.7, linewidth=0.5, zorder=0)
    ax2.grid(True, axis='x', which='both', linestyle='--', color='gray', alpha=0.7, linewidth=0.5, zorder=0)
    #------------------------------------------------------------------------
    plt.subplots_adjust(wspace=0.45)
    
    fig.text(0.51, 0.88, 'Job Type', ha='center', fontsize=9, color='black',\
             bbox=dict(facecolor='white', alpha=0.15, boxstyle='round'))
    
    fig.text(0.57, 0.95, 'Dates Count', fontsize=9, ha='center', va='center', color='black',\
             bbox=dict(facecolor='blue', alpha=0.15, boxstyle='rarrow'))
    
    fig.text(0.45, 0.95, 'Satisfaction', fontsize=9, ha='center', va='center', color='black',\
             bbox=dict(facecolor='red', alpha=0.15, boxstyle='larrow'))
    
    # plt.savefig('2_employee.png', facecolor='w', dpi=1000)
    plt.show()
    # e_1ce84 e_Ice84 e_5bfce2 e_87406d
    

interactive(children=(Text(value='e_5bfce2', description='Employee ID:', layout=Layout(width='35%'), placehold…