## Step 5: Visualize # of Movies vs. %ROI for each role using a scatterplot

\# of movies is used a proxy for consistent success in people's various roles. For example, one movie might just be a fluke, aka "One Hit Wonder" status.

Because all the dataframes are stored as dictionaries, it will be easy to iterate through them to quickly make a collection of figures.

In [None]:
# all # of movies

# create figure
fig, ax = plt.subplots(3, 3, figsize=[30,22.5])
plt.tight_layout(pad=5)

# get index `i` for plotting purposes, `ppl` for dataframe data purposes
for i,ppl in enumerate(pplorder):
    
    # to orient the figure in the correct subplot
    r = i//3
    c = i%3
    
    # plot the data in `ppl` dataframes from `ppl_comp`
    # `movie_count` on x, `ROI_careertot%` on y
    ax[r,c].scatter(x=ppl_comp[ppl]['movie_count'],y=ppl_comp[ppl]['ROI_careertot%'], label='Person')
    
    # plot averages in `ppl_comp_avgs`
    # `f'{ppl}_movie_count'` vertical line
    # `f'{ppl}_%ROI_avg'` horizontal line
    ax[r,c].axvline(ppl_comp_avgs[f'{ppl}_movie_count'], ls='--', color='green', label='Average Movie Count')
    ax[r,c].axhline(ppl_comp_avgs[f'{ppl}_%ROI_avg'], ls='--', color='red', label='Average ROI%')
    
    # remove grid to reduce busy-ness
    ax[r,c].grid(False)
    
    # labels and title
    ax[r,c].set_xlabel('# of Movies', fontsize=18)
    ax[r,c].set_ylabel('%ROI (Inflation Adjusted)', fontsize=18)
    ax[r,c].set_title(f'# of Movies vs. %ROI: {ppl.capitalize()}', fontsize=20)
    ax[r,c].tick_params(axis='both', which='major', labelsize=16)
    
    # set ticks in increments of 1, except 'producer', which is 5 due to high movie counts
    if ppl == 'producer':
        ax[r,c].xaxis.set_major_locator(plt.MultipleLocator(5))
    else:
        ax[r,c].xaxis.set_major_locator(plt.MultipleLocator(1))

# label the whole figure outside the subplots to reduce busy-ness
lines, labels = fig.axes[0].get_legend_handles_labels()
fig.legend(lines, labels, bbox_to_anchor=(0.97,0.98), loc='lower right', fontsize=20, frameon=True)

## Step 6: Plot top performers in each role in a bar graph

Now we can hone in on the 5 best people in each role and compare them on one axis: ROI%

In [None]:
# grab top 5 names in each position
ppl_compT = {}

for ppl in ppl_comp.keys():
    ppl_compT[ppl] = ppl_comp[ppl].head(5)

In [None]:
# create figure
fig, ax = plt.subplots(3, 3, figsize=[25,15])
plt.tight_layout(pad=10)

# get index `i` for plotting purposes, `ppl` for dataframe data purposes
for i,ppl in enumerate(pplorder):
    
    # to orient the figure in the correct subplot
    r = i//3
    c = i%3
    
    # plot the data in `ppl` dataframes from `ppl_compT`
    # `ROI_careertot%` as width, `primary_name` as label pn y axis
    ax[r,c].barh(y=ppl_compT[ppl]['primary_name'],width=ppl_compT[ppl]['ROI_careertot%'], label='Person')
    ax[r,c].invert_yaxis()
    
    # remove grid to reduce busy-ness
    ax[r,c].grid(False)
    
    # plot average ROI% as horizontal line 
    # `f'{ppl}_%ROI_avg'` in `ppl_comp_avgs`
    ax[r,c].axvline(ppl_comp_avgs[f'{ppl}_%ROI_avg'], ls='--', color='red', label='Average ROI%')
    
    # labels and title
    ax[r,c].set_xlabel('%ROI (Inflation Adjusted)', fontsize=18)
    ax[r,c].set_title(f'Name vs. %ROI: {ppl.capitalize()}', fontsize=20)
    ax[r,c].tick_params(axis='both', which='major', labelsize=16)
    
# label the whole figure outside the subplots to reduce busy-ness
lines, labels = fig.axes[0].get_legend_handles_labels()
fig.legend(lines, labels, bbox_to_anchor=(0.94,0.93), loc='lower right', fontsize=20, frameon=True)

# save the figure
plt.savefig('images/peoplefigs/name_roi_bar_allmov.png');