### Another Example

#### In this example we will produce a single stacked barchart graph of ratings by genres

#### 1. We start in the same way by loading the file

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

input_file = '..\data\movie_ratings_summary.csv'
df_movies = pd.read_csv(input_file, sep=',')

#### 2. We need to add a new column, the total count of the reviews for each movie

In [None]:
# the '\' characters are line continuation characters. The indentation here is my choice, Python doesn't insist on it
df_movies['Total'] = df_movies.Awful + \
                     df_movies.Bad + \
                     df_movies.Average + \
                     df_movies.Good + \
                     df_movies.Excellent
df_movies.head()

#### 3. We don't need the title column so we will drop it. When we do the aggregate, genres will become the index column.

In [None]:
df_movies.drop(columns='title', inplace = True)
df_groupedby = df_movies.groupby(['genres']).agg(sum)
df_groupedby.head()

#### 4. We can now change the Awful to Excellent columns to represent % of the total, after which we can remove the total column

In [None]:
df_groupedby['Awful'] = (df_groupedby['Awful']/df_groupedby['Total'])*100
df_groupedby['Bad'] = (df_groupedby['Bad']/df_groupedby['Total'])*100
df_groupedby['Average'] = (df_groupedby['Average']/df_groupedby['Total'])*100
df_groupedby['Good'] = (df_groupedby['Good']/df_groupedby['Total'])*100
df_groupedby['Excellent'] = (df_groupedby['Excellent']/df_groupedby['Total'])*100

df_groupedby.drop(columns='Total', inplace=True)
df_groupedby

#### 5. We can now create the graph, and the png file in much the sme way as before.

In [None]:
df_groupedby \
          .plot(kind='bar', stacked=True ) \
          .legend(bbox_to_anchor=(1, 1))
plt.title("By Genre", fontsize=24)
plt.ylabel('% Respondents', fontsize=18)
plt.xlabel('Genre', fontsize=18)
plt.gcf().set_size_inches(8, 8) 
plt.savefig('respondents.png', bbox_inches='tight')

#### 6. Had I not removed the Total column, I could have explicitly listed the columns I wanted included

In [None]:
df_groupedby[['Awful','Bad','Average', 'Good', 'Excellent']] \
          .plot(kind='bar', stacked=True ) \
          .legend(bbox_to_anchor=(1, 1))
plt.title("By Genre", fontsize=24)
plt.ylabel('% Respondents', fontsize=18)
plt.xlabel('Genre', fontsize=18)
plt.gcf().set_size_inches(8, 8) 
plt.savefig('respondents.png', bbox_inches='tight')