### Setup and read file into dataframe

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

input_file = '..\data\movie_ratings_summary.csv'
df_movies = pd.read_csv(input_file, sep=',')

### Show complete code and results
#### First a stacked barchart for films within each genre

In [None]:
df_movies.set_index('title', inplace = True)
genres = df_movies['genres'].unique()
for x in genres :
    count = len(df_movies[(df_movies.genres == x)])
    df_movies[(df_movies.genres == x)][['Awful','Bad','Average', 'Good', 'Excellent']] \
              .plot(kind='barh', stacked=True ) \
              .legend(bbox_to_anchor=(1, 1))
    plt.title(x, fontsize=24)
    plt.xlabel('Respondents', fontsize=18)
    plt.ylabel('Title', fontsize=18)
    plt.gcf().set_size_inches(5, count/2) 
    plt.savefig(x +'.png', bbox_inches='tight')

### Breaking this down

#### 1. Change the default index of the dataframe to 'title' as we use these for the categorical variable normally on the x-axis

In [None]:
# this has already been done so don't run it again
df_movies.set_index('title', inplace = True)

#### 2. Next get a list of the genres and for now print them

In [None]:
genres = df_movies['genres'].unique()
for x in genres :
    print(x)

#### 3. All we have to do now is change the print statement for code that draws a barchart
#### 4. Try starting with a simple barchart

In [None]:
    df_movies[(df_movies.genres == 'Crime')][['Awful','Bad','Average', 'Good', 'Excellent']] \
              .plot(kind='bar' )

#### 5. What would this look like if we had a lot of films in the genre?

In [None]:
    df_movies[(df_movies.genres == 'Comedy')][['Awful','Bad','Average', 'Good', 'Excellent']] \
              .plot(kind='bar' )

#### 6. - A Total mess. So swap the axis with a one character change to the code

In [None]:
df_movies[(df_movies.genres == 'Comedy')][['Awful','Bad','Average', 'Good', 'Excellent']] \
          .plot(kind='barh' )

#### 7. Still a mess. we need to make the height of the graphing area larger. As we want to draw a graph for each genre and we  know that the number of movies in each varies greatly, it would be nice to select a height which is proportional to the number of movies in the genre.

#### 8. we can count the number of movies in a genre with this code

In [None]:
count = len(df_movies[(df_movies.genres == 'Comedy')])
print(count)

#### 9. We now need to pass this into the graph code. This is the first time we explicitly use a matplotlib method, although the plot method in pandas does actuall use matplotlib in the background.

In [None]:
count = len(df_movies[(df_movies.genres == 'Comedy')])
df_movies[(df_movies.genres == 'Comedy')][['Awful','Bad','Average', 'Good', 'Excellent']] \
          .plot(kind='barh' )
plt.gcf().set_size_inches(5, count/2) 

#### 10. Now we want change the individual bars into a staked bar. We can do this by adding a single parameter to the plot method.

In [None]:
count = len(df_movies[(df_movies.genres == 'Comedy')])
df_movies[(df_movies.genres == 'Comedy')][['Awful','Bad','Average', 'Good', 'Excellent']] \
          .plot(kind='barh', stacked=True )
plt.gcf().set_size_inches(5, count/2)

#### 11. Now we just want to make some cosmetic changes to the axis labels and a title.

In [None]:
count = len(df_movies[(df_movies.genres == 'Comedy')])
df_movies[(df_movies.genres == 'Comedy')][['Awful','Bad','Average', 'Good', 'Excellent']] \
          .plot(kind='barh', stacked=True )
plt.gcf().set_size_inches(5, count/2)
plt.title('Comedy', fontsize=24)
plt.xlabel('Respondents', fontsize=18)
plt.ylabel('Title', fontsize=18)

#### 12. Finally a line of code to save th graph to a file.

In [None]:
count = len(df_movies[(df_movies.genres == 'Comedy')])
df_movies[(df_movies.genres == 'Comedy')][['Awful','Bad','Average', 'Good', 'Excellent']] \
          .plot(kind='barh', stacked=True )
plt.gcf().set_size_inches(5, count/2)
plt.title('Comedy', fontsize=24)
plt.xlabel('Respondents', fontsize=18)
plt.ylabel('Title', fontsize=18)
plt.savefig('Comedy' +'.png', bbox_inches='tight')

#### 13. Now put this code back into the for loop so that all of the graphs are produced

In [None]:
#df_movies.set_index('title', inplace = True)
genres = df_movies['genres'].unique()
for x in genres :
    count = len(df_movies[(df_movies.genres == x)])
    df_movies[(df_movies.genres == x)][['Awful','Bad','Average', 'Good', 'Excellent']] \
              .plot(kind='barh', stacked=True ) \
              .legend(bbox_to_anchor=(1, 1))
    plt.title(x, fontsize=24)
    plt.xlabel('Respondents', fontsize=18)
    plt.ylabel('Title', fontsize=18)
    plt.gcf().set_size_inches(5, count/2) 
    plt.savefig(x +'.png', bbox_inches='tight')