# Importing the libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# Importing the dataset

In [None]:
ds = pd.read_csv('Movie_Ratings.csv')

In [None]:
ds.head()

In [None]:
ds.info()

#### As the 'Year of release' is a category, it should not be treated like an ordinary integer. We will therefore change it to a categoryical data type.

In [None]:
ds['Year of release'] = ds['Year of release'].astype('category')
ds.Genre = ds.Genre.astype('category')

In [None]:
ds.info()

In [None]:
ds['Year of release'].unique()

In [None]:
ds.Genre.cat.categories

# Joint Plots

In [None]:
jp = sns.jointplot(data = ds, x = 'Rotten Tomatoes Ratings %', y = 'Audience Ratings %', kind = 'scatter', color = 'maroon')

In [None]:
jp1 = sns.jointplot(data = ds, x = 'Rotten Tomatoes Ratings %', y = 'Audience Ratings %', kind = 'kde', color = 'darkgreen')

In [None]:
# We observe that the 'hex' type shows us where clusters of points are formed

jp2 = sns.jointplot(data = ds, x = 'Rotten Tomatoes Ratings %', y = 'Audience Ratings %', kind = 'hex')

# Histogram

In [None]:
sns.set_style('darkgrid')
plt.hist(ds['Budget (million $)'], bins = 20)

In [None]:
ds.Genre.cat.categories

In [None]:
plt.hist([ds[ds.Genre == 'Action']['Budget (million $)'],
         ds[ds.Genre == 'Adventure']['Budget (million $)'], 
         ds[ds.Genre == 'Comedy']['Budget (million $)'],
         ds[ds.Genre == 'Drama']['Budget (million $)'],
         ds[ds.Genre == 'Horror']['Budget (million $)'],
         ds[ds.Genre == 'Romance']['Budget (million $)'],
         ds[ds.Genre == 'Thriller']['Budget (million $)']], bins = 15, stacked = True)
plt.show()

In [None]:
for gen in ds.Genre.cat.categories:
    plt.hist([ds[ds.Genre == gen]['Budget (million $)']], stacked = True, bins = 15)
plt.show()

# Scatter Plots

In [None]:
sns.lmplot(data = ds, x = 'Rotten Tomatoes Ratings %', y = 'Audience Ratings %', hue = 'Genre', fit_reg = False, legend = False, aspect = 1, size = 7)
plt.legend(loc = 'upper left', bbox_to_anchor = (1, 1.013))

# Kernel Density Estimate Plot (KDE Plot)

In [None]:
# It is good practice to put 2 KDE plots on top of each other to have sharper edges

kde1 = sns.kdeplot(ds['Rotten Tomatoes Ratings %'], ds['Audience Ratings %'], shade = True, shade_lowest = False)
kde2 = sns.kdeplot(ds['Rotten Tomatoes Ratings %'], ds['Audience Ratings %'], cmap = 'Blues')

# Working with Subplots()

In [None]:
# Subplots can be used to make dashboards

f, axes = plt.subplots(1,2, figsize = (12,6), sharex = True, sharey = True)
kde_1 = sns.kdeplot(ds['Budget (million $)'], ds['Rotten Tomatoes Ratings %'], ax = axes[0])
kde_2 = sns.kdeplot(ds['Budget (million $)'], ds['Audience Ratings %'], ax = axes[1])
kde_1.set(xlim = (-20, 200))

# Violin plots and Box plots

##### Violin plots are similar to Box plots, except they also show the probability density of the data at different values. Violin plots show the full distribution of the data.

In [None]:
plt.rcParams['figure.figsize'] = (10,5)
violinplot = sns.violinplot(data = ds, x = 'Genre', y = 'Budget (million $)')

In [None]:
boxplot = sns.boxplot(data = ds, x = 'Genre', y = 'Budget (million $)')

# Facet Grid

In [None]:
facet_grid = sns.FacetGrid(ds, row = 'Year of release', col = 'Genre', hue = 'Genre')
kws = dict(s=50, linewidth = 0.5, edgecolor = 'black')
facet_grid = facet_grid.map(plt.scatter, 'Budget (million $)', 'Rotten Tomatoes Ratings %', **kws)

In [None]:
facet_grid = sns.FacetGrid(ds, row = 'Year of release', col = 'Genre', hue = 'Genre')
facet_grid = facet_grid.map(plt.hist, 'Budget (million $)')

# Building a dashboard

In [None]:
ds.head(2)

In [None]:
sns.set_style('darkgrid')
f, axes = plt.subplots(2,2, figsize = (15,15))
kde1 = sns.kdeplot(ds['Budget (million $)'], ds['Audience Ratings %'], cmap = 'Greens', shade = True, shade_lowest = False, ax = axes[0,0])
kde1 = sns.kdeplot(ds['Budget (million $)'], ds['Audience Ratings %'], cmap = 'Greens', ax = axes[0,0])
kde2 = sns.kdeplot(ds['Budget (million $)'], ds['Rotten Tomatoes Ratings %'], cmap = 'inferno', shade = True, shade_lowest = False, ax = axes[0,1])
kde2 = sns.kdeplot(ds['Budget (million $)'], ds['Rotten Tomatoes Ratings %'], cmap = 'cool', ax = axes[0,1])
plot3 = sns.violinplot(data = ds[ds.Genre == 'Drama'], x = ds['Year of release'], y = ds['Rotten Tomatoes Ratings %'], ax = axes[1,0])
kde3 = sns.kdeplot(ds['Rotten Tomatoes Ratings %'], ds['Audience Ratings %'], cmap = 'Reds_r', shade = True, shade_lowest = False, ax = axes[1,1])
kde3 = sns.kdeplot(ds['Rotten Tomatoes Ratings %'], ds['Audience Ratings %'], cmap = 'Reds_r', ax = axes[1,1])
kde3.set(xlim = (-20, 160))
kde2.set(xlim = (-20, 160))
kde1.set(xlim = (-20, 160))
plt.show()

# Get Creative

In [None]:
# Define the style
_.set(style="darkgrid", palette="muted", color_codes=True)

# Plot the boxsplots
ax = sns._(data=_, x='_', y='_', orient='v', color='lightgray', showfliers=False)
plt.setp(ax.artists, alpha=0.5)

# Add in points to show each observation
_.stripplot(x='_', y='_', data=_, jitter=True, size=6, linewidth=0, hue = 'Studio', alpha=0.7)

ax.axes.set_title('_',fontsize=30)
ax.set_xlabel('_',fontsize=20)
ax.set_ylabel('_',fontsize=20)

# Define where to place the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [None]:
sns.set(style="darkgrid", palette="muted", color_codes=True)
plt.rcParams['figure.figsize'] = 12, 12
ax = sns.boxplot(data=ds, x='Genre', y='Audience Ratings %', orient='v', color='lightgray', showfliers=False)
plt.setp(ax.artists, alpha=0.5)
sns.stripplot(data = ds, x='Genre', y='Audience Ratings %', jitter=True, size=6, linewidth=0, hue = 'Genre', alpha=0.7)
ax.axes.set_title('Audience Rating Per Genre',fontsize=30)
ax.set_xlabel('Genre',fontsize=20, color = 'grey')
ax.set_ylabel('Audience Rating',fontsize=20, color = 'grey')
ax.legend(bbox_to_anchor=(1, 1.0075), loc=2)

# Quiz

In [None]:
# Q1: You are performing analytics on a dataset with information about customers of a bank. 
# Which of the following columns should most likely be a categorical data type?

# 1.) Account balance
# 2.) Credit limit
# 3.) Postcode
# 4.) Number of years with the bank

Q1_ans = print(round(17/7 + 1/3 + 9/17 - 5/22))

In [None]:
# Q2: Which function did we use to create dashboards in this course?

# 1.) plt.plot()
# 2.) plt.subplots()
# 3.) sns.FacetGrid()
# 4.) sns.dashboard()

Q2_ans = print(round(1/17 + 79/17 - 55/22))

In [None]:
# Q3: How many columns and/or rows are required to create a violinplot using Seaborn? 
# What do their types have to be?

# 1.) 1 column: numeric
# 2.) 1 column: categorical
# 3.) 2 columns: both numerical
# 4.) 2 columns: 1 categorical and 1 numerical

Q3_ans = print(round(17/9 + 4/3 + 9/13 - 7/22))

In [None]:
# Q4: We are working with the following code:

'''
f, axes = plt.subplots(2, 1, figsize=(15,15))
k1 = sns.kdeplot(movies.BudgetMillions, movies.AudienceRating, \
                 shade=True, shade_lowest=False, cmap='cool', \
                 ax=axes[0])
plt.show()
'''

# This code produces two subplots, one under the other. The first one has the KDE plot, the second one is empty. 
# However, your project requirements have changed and now you need to have 4 subplots in a 2x2 matrix. 
# The KDE plot must stay in the top left spot. You replace the first line of code with the following code:

'''
f, axes = plt.subplots(2, 2, figsize=(15,15))
'''

# What other change do you need to make to your code in order to avoid getting an error?


# 1.) Replace "ax = axes[0]" with "ax = axes[0,0]"
# 2.) Replace "sns.kdeplot" with "plt.hist"
# 3.) Replace "figsize = (15,15)" with "figsize = (2,2)"
# 4.) No changes are required

Q4_ans = print(round(7/9 - 4/13 + 27/47 ))

In [None]:
# Q5: Which of the following parameters is not a real parameter of plt.legend()

# 1.) frameon
# 2.) fancybox
# 3.) shadow
# 4.) bins

Q5_ans = print(round(5/11 + 15/7 + 13/11))