In [None]:
# Dependencies and Setup
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
# NM File path
file_path = "Resources/glassdoor_reviews.csv"

In [None]:
# NM Read File and store into Pandas DataFrames
file_read= pd.read_csv(file_path)
file_read.head()

In [None]:
# NM show the data type of each column
file_read.dtypes

In [None]:
#NM Calculate the total number of firms
firm_total= file_read["firm"].nunique()
firm_total

In [None]:
# NM Calculate the total number of job titles
job_title_total= file_read["job_title"].nunique()
job_title_total

In [None]:
# NM Delete the columns which are not useful
file_new= file_read.drop(["column_label","recommend", "ceo_approv","outlook","headline","pros", "cons", "use"], axis=1)
file_new

In [None]:
# VG Calculate the total number of firms
firm_total= file_read["firm"].nunique()
firm_total

In [None]:
# VG Calculate the total number of job titles
job_title_total= file_read["job_title"].nunique()
job_title_total

In [None]:
# NM Filter data by "firm" using groupby method
firm_group= file_new.groupby(["firm"])
firm_group

In [None]:
# VG Delete the columns which are not useful
file_new= file_read.drop(["column_label","recommend", "ceo_approv","outlook","headline","pros", "cons", "use"], axis=1)
file_new

In [None]:
# VG Filter data by "firm" using groupby method
firm_group= file_new.groupby(["firm"])
firm_group.head()

In [None]:
# VG Count number of "job title" in each firm 
firm_group["job_title"].count()

In [None]:
# NM Count number of "job title" in each firm 
firm_group["job_title"].count()

In [None]:
# VG transpose date format
file_new["year"]= pd.Series(file_new["date_review"]).str.slice(stop=4)
file_new["month"]= pd.Series(file_new["date_review"]).str.slice(start=5,stop=7)
file_new["day"]= pd.Series(file_new["date_review"]).str.slice(start=8,stop=10)
file_new

In [None]:
# NM Calculate Mean of the ratings based on the firms
job_satis_df = firm_group["overall_rating","work_life_balance","culture_values","diversity_inclusion","career_opp","comp_benefits","senior_mgmt"].mean()
job_satis_df

In [None]:
# VG - Remove unused columns. We only want to use the year
file_clean= file_new.drop(["date_review","month", "day"], axis=1)
file_clean.head()

In [None]:
# VG - isolated the two companies to be reviewed. We chose Accenture & LEGO

firm_reviews = file_clean.loc[(file_clean["firm"]== "Accenture") | (file_clean["firm"]== "the-LEGO-Group")]
firm_reviews


In [None]:
# VG - Groupby company name & year, showing all years of review
firm_group_year = firm_reviews.groupby(["firm", "year"])


firm_group_year.head()

In [None]:
# VG - Get the mean for each of the criteria being analysed & replace NAN values with 0
company_ratings = firm_group_year["overall_rating","work_life_balance","culture_values","diversity_inclusion","career_opp","comp_benefits","senior_mgmt"].mean()
company_ratings = company_ratings.fillna(0)

company_ratings

In [None]:
# VG -  Extract the data for The LEGO Group

lego_data = company_ratings.reset_index() 
legos = lego_data.loc[lego_data['firm']== 'the-LEGO-Group']
legos

In [None]:
# VG -  Create the line chart for LEGO by overall rating

plt.plot(legos['year'],legos['overall_rating'])
plt.xlabel('Rating by Year')
plt.ylabel('Overall Rating')
plt.xticks(rotation=70) 
plt.title('the-LEGO-Group Employee Ratings')

# Save into folder
plt.savefig("Output/The-LEGO-Group Employee Ratings.png",dpi=300, bbox_inches = "tight")

plt.show()

In [None]:
# VG - Extract the data for Accenture


accenture_data = company_ratings.reset_index() 
accenture = accenture_data.loc[accenture_data['firm']== 'Accenture']
accenture

In [None]:
# VG - Create the line chart for Accenture by overall rating


plt.plot(accenture['year'],accenture['overall_rating'])
plt.xlabel('Rating by Year')
plt.ylabel('Overall rating')
plt.xticks(rotation=70) 
plt.title('Accenture Employee Ratings')

# VG Save into folder
plt.savefig("Output/Accenture Employee Ratings.png",dpi=300, bbox_inches = "tight")

plt.show()

In [None]:
# VG - most successfully rated years (overall rating)
best_yrs = company_ratings.sort_values("overall_rating", ascending = False)
best_yrs.head(10)


In [None]:
# VG - least successfully rated years (overall rating)
low_yrs = best_yrs.tail()
low_yrs = low_yrs.sort_values('overall_rating')
low_yrs.head()

In [None]:
best_yrs.plot()
xlabel=("Firm")
plt.ylabel("Ratings")
plt.xticks(rotation=70) 
plt.legend(loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.savefig("Output/Best Years.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(20,8))
plt.show()

In [None]:
ratings = company_ratings.plot.area(figsize=(20, 8), subplots=True)
plt.savefig("Output/Company Rating Changes Over Time.png",dpi=300, bbox_inches = "tight")

plt.show()

In [None]:
# NM Replace NaN/None Values with Zero
job_satis_na_df = job_satis_df.fillna(0)
job_satis_na_df

In [None]:
# NM nsmallest() helps to get the smallest numbers from a variable or from a dataset.
overall_rating_lowest= job_satis_na_df.nsmallest(n=5, columns=['overall_rating'])
overall_rating_lowest

In [None]:
# NM nlargest () helps to get the highest numbers from a variable or from a dataset.
overall_rating_largest= job_satis_na_df.nlargest(n=5, columns=['overall_rating'])
overall_rating_largest

In [None]:
# NM Generate a bar plot to compare the Bottom 5 Firms Overall Rating vs their various work environment factors
x_axis= np.arange(len(overall_rating_lowest))
ticks= list(overall_rating_lowest.index.values)
tick_locations= [value +0.4 for value in x_axis]
plt.xticks(tick_locations, ticks,rotation="90")
overall = plt.bar(x_axis +0.13, overall_rating_lowest["overall_rating"], width=0.12, label = "Overall Rating", color="red", alpha=0.75, align="center")
wlb = plt.bar(x_axis +0.25, overall_rating_lowest["work_life_balance"], width=0.12, label = "Work Life Balance", color="b", alpha=0.75, align="center")
cul_val = plt.bar(x_axis +0.35, overall_rating_lowest["culture_values"], width=0.12, label = "Culture values", color="g", alpha=0.75, align="center")
div_inc = plt.bar(x_axis +0.45, overall_rating_lowest["diversity_inclusion"], width=0.12, label = "Diversity inclusion", color="orange", alpha=0.75, align="center")
carrer_opp = plt.bar(x_axis +0.55, overall_rating_lowest["career_opp"], width=0.12, label = "Career opportunities", color="y", alpha=0.75, align="center")
comp_ben = plt.bar(x_axis +0.65, overall_rating_lowest["comp_benefits"], width=0.12, label = "Company benefits", color="purple", alpha=0.75, align="center")
senior_mgt= plt.bar(x_axis +0.75, overall_rating_lowest["senior_mgmt"], width=0.12, label = 'Senior management', color="cyan", alpha=0.75, align="center")
plt.xlim(-0.5, len(x_axis))
plt.ylim(0,4 + 0.25)
plt.legend(handles = [overall,wlb,cul_val,div_inc,carrer_opp,comp_ben,senior_mgt ], loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.title("Bottom 5 Firms Overall Rating vs Work environment factor")
plt.savefig("Output/Bottom 5 Firms Overall Rating vs Work environment factor.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(10,8))
plt.tight_layout()
plt.show()

In [None]:
# # NM Generate a bar plot to compare the Top 5 Firms Overall Rating vs their various work environment factors
x_axis= np.arange(len(overall_rating_largest))
ticks= list(overall_rating_largest.index.values)
tick_locations= [value +0.4 for value in x_axis]
plt.xticks(tick_locations, ticks,rotation="90")
overall = plt.bar(x_axis +0.13, overall_rating_largest["overall_rating"], width=0.12, label = "Overall Rating", color="red", alpha=0.75, align="center")
wlb = plt.bar(x_axis +0.25, overall_rating_largest["work_life_balance"], width=0.12, label = "Work Life Balance", color="b", alpha=0.75, align="center")
cul_val = plt.bar(x_axis +0.35, overall_rating_largest["culture_values"], width=0.12, label = "Culture values", color="g", alpha=0.75, align="center")
div_inc = plt.bar(x_axis +0.45, overall_rating_largest["diversity_inclusion"], width=0.12, label = "Diversity inclusion", color="orange", alpha=0.75, align="center")
carrer_opp = plt.bar(x_axis +0.55, overall_rating_largest["career_opp"], width=0.12, label = "Career opportunities", color="y", alpha=0.75, align="center")
comp_ben = plt.bar(x_axis +0.65, overall_rating_largest["comp_benefits"], width=0.12, label = "Company benefits", color="purple", alpha=0.75, align="center")
senior_mgt= plt.bar(x_axis +0.75, overall_rating_largest["senior_mgmt"], width=0.12, label = 'Senior management', color="cyan", alpha=0.75, align="center")
plt.xlim(-0.5, len(x_axis))
plt.ylim(0,5 + 0.5)
plt.legend(handles = [overall,wlb,cul_val,div_inc,carrer_opp,comp_ben,senior_mgt ], loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.title("Top 5 Firms Overall Rating v/s Work environment factor")
plt.savefig("Output/Top 5 Firms Overall Rating.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(10,8))
plt.tight_layout()
plt.show()


In [None]:
# # NM
file_new["year"]= pd.Series(file_new["date_review"]).str.slice(stop=4)
file_new["month"]= pd.Series(file_new["date_review"]).str.slice(start=5,stop=7)
file_new["day"]= pd.Series(file_new["date_review"]).str.slice(start=8,stop=10)
file_new

In [None]:
# # NM
file_new= file_new.drop(["date_review","month", "day"], axis=1)
file_new

In [None]:
# # NM
firm_year_df= file_new.loc[file_new["firm"].isin(["Accenture","the-LEGO-Group"]), :]
firm_year_df

In [None]:
# # JW
lego_df= file_new.loc[file_new["firm"].isin(["the-LEGO-Group"]), :]
lego_df

In [None]:
accenture_df= file_new.loc[file_new["firm"].isin(["Accenture"]), :]
accenture_df

In [None]:
# # JW
lego_loc= lego_df.groupby(["location"])
accenture_loc = accenture_df.groupby(["location"])
lego_loc.head()

In [None]:
# # NM
firm_group_year= firm_year_df.groupby(["firm","year"])
firm_group_year.head()

In [None]:
accenture_loc_df = accenture_loc["overall_rating", "work_life_balance","culture_values"].mean()
accenture_loc_df2 = accenture_loc["overall_rating","work_life_balance","culture_values","diversity_inclusion","career_opp","comp_benefits","senior_mgmt"].mean()
accenture_locations = accenture_loc_df.index
accenture_loc_df2

In [None]:
# JW
lego_loc_df = lego_loc["overall_rating", "work_life_balance","culture_values"].mean()
lego_loc_df2 = lego_loc["overall_rating","work_life_balance","culture_values","diversity_inclusion","career_opp","comp_benefits","senior_mgmt"].mean()
lego_locations = lego_loc_df.index
lego_loc_df2

In [None]:
# # NM
firm_group_rating_yearly= firm_group_year["overall_rating","work_life_balance","culture_values","diversity_inclusion","career_opp","comp_benefits","senior_mgmt"].mean()
firm_group_rating_yearly

In [None]:
# # JW

best_lego = lego_loc_df.nlargest(12, "overall_rating")
best_lego = best_lego.fillna(0)
best_lego

In [None]:
best_lego2 = lego_loc_df2.nlargest(12, "overall_rating")
best_lego2 = best_lego2.fillna(0)
best_lego2

In [None]:
best_accenture = accenture_loc_df.nlargest(12, "overall_rating")
best_accenture = best_accenture.fillna(0)
best_accenture

In [None]:
best_accenture2 = accenture_loc_df2.nlargest(12, "overall_rating")
best_accenture2 = best_accenture2.fillna(0)
best_accenture2

In [None]:
worst_lego = lego_loc_df.nsmallest(12, "overall_rating")
worst_lego = worst_lego.fillna(0)
worst_lego2 = lego_loc_df2.nsmallest(12, "overall_rating")
worst_lego2 = worst_lego2.fillna(0)
worst_lego

In [None]:
worst_accenture = accenture_loc_df.nsmallest(12, "overall_rating")
worst_accenture = worst_accenture.fillna(0)
worst_accenture2 = accenture_loc_df2.nsmallest(12, "overall_rating")
worst_accenture2 = worst_accenture2.fillna(0)
worst_accenture2

In [None]:
# # NM
firm_group_rating_yearly = firm_group_rating_yearly.fillna(0)
firm_group_rating_yearly

In [None]:
# # NM
sns.relplot(data = firm_group_rating_yearly, x = "year", y = "overall_rating", hue = "firm", kind="line")
plt.xticks(rotation=60)
plt.title('Yearly Change Overall Rating By Firm')
plt.xlabel('Year')
plt.ylabel('Overall Rating')
plt.savefig("Output/Yearly Change Overall Rating By Firm.png",dpi=300, bbox_inches = "tight")
plt.show()


In [None]:
worst_accenture.plot.line(figsize=(10, 8))
plt.xticks(rotation=90)
plt.legend(worst_accenture.columns,loc='best')
plt.title("Worst Accenture Locations")
plt.savefig("Output/Worst Accenture Locations.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
best_accenture.plot.line(figsize=(10, 8))
plt.xticks(rotation=90)
plt.legend(best_accenture.columns,loc='best')
plt.title("Best Accenture Locations")
plt.savefig("Output/Best Accenture Locations.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
worst_lego.plot.line(figsize=(10, 8))
plt.xticks(rotation=90)
plt.legend(worst_lego.columns,loc='best')
plt.title("Worst Lego Locations")
plt.savefig("Output/Worst Lego Locations.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
best_lego.plot.line(figsize=(10, 8))
plt.xticks(rotation=90)
plt.legend(best_lego.columns,loc='best')
plt.title("Best Lego Locations")
plt.savefig("Output/Best Lego Locations.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
ratings = best_lego.plot.line(figsize=(10, 8), subplots=True, title="Best Lego Locations")
plt.xticks(rotation=90)
plt.savefig("Output/Best Lego Locations 2.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
ratings = worst_lego.plot.line(figsize=(10, 8), subplots=True, title="Worst Lego Locations")
plt.xticks(rotation=90)
plt.savefig("Output/Worst Lego Locations 2.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
ratings = best_accenture.plot.line(figsize=(10, 8), subplots=True, title="Best Accenture Locations")
plt.xticks(rotation=90)
plt.savefig("Output/Best Accenture Locations 2.png",dpi=300, bbox_inches = "tight")
plt.show()

In [None]:
ratings = worst_accenture.plot.line(figsize=(10, 8), subplots=True, title="Worst Accenture Locations")
plt.xticks(rotation=90)
plt.savefig("Output/Worst Accenture Locations 2.png")
plt.show()

In [None]:
# location_lowest= lego_loc_df.nsmallest(n=10, columns=['overall_rating'])

location_lowest = worst_lego2
location_lowest

In [None]:
x_axis= np.arange(len(location_lowest))
ticks= list(location_lowest.index.values)
tick_locations= [value +0.4 for value in x_axis]
plt.xticks(tick_locations, ticks,rotation="90")
overall = plt.bar(x_axis +0.13, location_lowest["overall_rating"], width=0.12, label = "Overall Rating", color="red", alpha=0.75, align="center")
wlb = plt.bar(x_axis +0.25, location_lowest["work_life_balance"], width=0.12, label = "Work Life Balance", color="b", alpha=0.75, align="center")
cul_val = plt.bar(x_axis +0.35, location_lowest["culture_values"], width=0.12, label = "Culture values", color="g", alpha=0.75, align="center")
div_inc = plt.bar(x_axis +0.45, location_lowest["diversity_inclusion"], width=0.12, label = "Diversity inclusion", color="orange", alpha=0.75, align="center")
carrer_opp = plt.bar(x_axis +0.55, location_lowest["career_opp"], width=0.12, label = "Career opportunities", color="y", alpha=0.75, align="center")
comp_ben = plt.bar(x_axis +0.65, location_lowest["comp_benefits"], width=0.12, label = "Company benefits", color="purple", alpha=0.75, align="center")
senior_mgt= plt.bar(x_axis +0.75, location_lowest["senior_mgmt"], width=0.12, label = 'Senior management', color="cyan", alpha=0.75, align="center")
plt.xlim(-0.5, len(x_axis))
plt.ylim(0,5 + 0.5)
plt.legend(handles = [overall,wlb,cul_val,div_inc,carrer_opp,comp_ben,senior_mgt ], loc = "best", bbox_to_anchor = (1.0, 1.0))

# plt.legend(handles = [overall,wlb,cul_val ], loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.title("Lego Worst Locations Overall Rating v/s Work environment factor")
plt.savefig("Output/Lego Worst Location Overall Rating.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(100,8))
plt.tight_layout()
plt.show()


In [None]:
location_lowest2 = worst_accenture2
location_lowest2

In [None]:
x_axis= np.arange(len(location_lowest2))
ticks= list(location_lowest2.index.values)
tick_locations= [value +0.4 for value in x_axis]
plt.xticks(tick_locations, ticks,rotation="90")
overall = plt.bar(x_axis +0.13, location_lowest2["overall_rating"], width=0.12, label = "Overall Rating", color="red", alpha=0.75, align="center")
wlb = plt.bar(x_axis +0.25, location_lowest2["work_life_balance"], width=0.12, label = "Work Life Balance", color="b", alpha=0.75, align="center")
cul_val = plt.bar(x_axis +0.35, location_lowest2["culture_values"], width=0.12, label = "Culture values", color="g", alpha=0.75, align="center")
div_inc = plt.bar(x_axis +0.45, location_lowest2["diversity_inclusion"], width=0.12, label = "Diversity inclusion", color="orange", alpha=0.75, align="center")
carrer_opp = plt.bar(x_axis +0.55, location_lowest2["career_opp"], width=0.12, label = "Career opportunities", color="y", alpha=0.75, align="center")
comp_ben = plt.bar(x_axis +0.65, location_lowest2["comp_benefits"], width=0.12, label = "Company benefits", color="purple", alpha=0.75, align="center")
senior_mgt= plt.bar(x_axis +0.75, location_lowest2["senior_mgmt"], width=0.12, label = 'Senior management', color="cyan", alpha=0.75, align="center")
plt.xlim(-0.5, len(x_axis))
plt.ylim(0,5 + 0.5)
plt.legend(handles = [overall,wlb,cul_val,div_inc,carrer_opp,comp_ben,senior_mgt ], loc = "best", bbox_to_anchor = (1.0, 1.0))

# plt.legend(handles = [overall,wlb,cul_val ], loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.title("Accenture Location Overall Rating v/s Work environment factor")
plt.savefig("Output/Accenture Worst Locations Overall Rating.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(100,8))
plt.tight_layout()
plt.show()

In [None]:
location_highest = best_lego2
best_lego2

In [None]:
x_axis= np.arange(len(location_highest))
ticks= list(location_highest.index.values)
tick_locations= [value +0.4 for value in x_axis]
plt.xticks(tick_locations, ticks,rotation="90")
overall = plt.bar(x_axis +0.13, location_highest["overall_rating"], width=0.12, label = "Overall Rating", color="red", alpha=0.75, align="center")
wlb = plt.bar(x_axis +0.25, location_highest["work_life_balance"], width=0.12, label = "Work Life Balance", color="b", alpha=0.75, align="center")
cul_val = plt.bar(x_axis +0.35, location_highest["culture_values"], width=0.12, label = "Culture values", color="g", alpha=0.75, align="center")
div_inc = plt.bar(x_axis +0.45, location_highest["diversity_inclusion"], width=0.12, label = "Diversity inclusion", color="orange", alpha=0.75, align="center")
carrer_opp = plt.bar(x_axis +0.55, location_highest["career_opp"], width=0.12, label = "Career opportunities", color="y", alpha=0.75, align="center")
comp_ben = plt.bar(x_axis +0.65, location_highest["comp_benefits"], width=0.12, label = "Company benefits", color="purple", alpha=0.75, align="center")
senior_mgt= plt.bar(x_axis +0.75, location_highest["senior_mgmt"], width=0.12, label = 'Senior management', color="cyan", alpha=0.75, align="center")
plt.xlim(-0.5, len(x_axis))
plt.ylim(0,5.1 + 0.5)
plt.legend(handles = [overall,wlb,cul_val,div_inc,carrer_opp,comp_ben,senior_mgt ], loc = "best", bbox_to_anchor = (1.0, 1.0))

# plt.legend(handles = [overall,wlb,cul_val ], loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.title("Lego Best Locations Overall Rating v/s Work environment factor")
plt.savefig("Output/Lego Best Locations Overall Rating.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(100,8))
plt.tight_layout()
plt.show()

In [None]:
location_highest2 = best_accenture2
best_accenture2

In [None]:
x_axis= np.arange(len(location_highest))
ticks= list(location_highest.index.values)
tick_locations= [value +0.4 for value in x_axis]
plt.xticks(tick_locations, ticks,rotation="90")
overall = plt.bar(x_axis +0.13, location_highest["overall_rating"], width=0.12, label = "Overall Rating", color="red", alpha=0.75, align="center")
wlb = plt.bar(x_axis +0.25, location_highest["work_life_balance"], width=0.12, label = "Work Life Balance", color="b", alpha=0.75, align="center")
cul_val = plt.bar(x_axis +0.35, location_highest["culture_values"], width=0.12, label = "Culture values", color="g", alpha=0.75, align="center")
div_inc = plt.bar(x_axis +0.45, location_highest["diversity_inclusion"], width=0.12, label = "Diversity inclusion", color="orange", alpha=0.75, align="center")
carrer_opp = plt.bar(x_axis +0.55, location_highest["career_opp"], width=0.12, label = "Career opportunities", color="y", alpha=0.75, align="center")
comp_ben = plt.bar(x_axis +0.65, location_highest["comp_benefits"], width=0.12, label = "Company benefits", color="purple", alpha=0.75, align="center")
senior_mgt= plt.bar(x_axis +0.75, location_highest["senior_mgmt"], width=0.12, label = 'Senior management', color="cyan", alpha=0.75, align="center")
plt.xlim(-0.5, len(x_axis))
plt.ylim(0,5.1 + 0.5)
plt.legend(handles = [overall,wlb,cul_val,div_inc,carrer_opp,comp_ben,senior_mgt ], loc = "best", bbox_to_anchor = (1.0, 1.0))

# plt.legend(handles = [overall,wlb,cul_val ], loc = "best", bbox_to_anchor = (1.0, 1.0))
plt.title("Accenture Best Locations Overall Rating v/s Work environment factor")
plt.savefig("Output/Accenture Best Locations Overall Rating.png",dpi=300, bbox_inches = "tight")
plt.figure(figsize=(100,8))
plt.tight_layout()
plt.show()