# Creating plots of your dataframes

### The easiest way to create plots good plot is these 2 options:
#### - use the seaborn library for plots
#### - or if you want interactive plots use a library called plotly express

In [None]:
import pandas as pd
import seaborn as sns

import plotly.express as px

# some nicer plotly setting
import plotly.io as pio
pio.templates.default = 'plotly_white'

df = pd.read_csv('./data/most_voted_titles_enriched.csv')

df.head(3)

### Let's see how you make a scatter plot in seaborn and plot runtime vs the average rating.
### The syntax is usually like this:
#### - specify the dataframe you want to use
#### - specify your x-variable
#### - specify your y-variable
#### - and if you would like to color certain points, then specify the hue

### Here's the seaborn way of doing things:

In [None]:
sns.scatterplot(
    data=df, 
    x='runtimeMinutes', 
    y='averageRating', 
    hue='titleType', 
    s=3.
);

### But plotting interactive plots is just as easy and makes it easier to check outliers. We are using plotly.express for this.

In [None]:
px.scatter(
    title='runtime vs average rating',
    data_frame=df.query('runtimeMinutes < 400'), 
    x='runtimeMinutes', 
    y='averageRating', 
    color='titleType',
    hover_data=['primaryTitle'],
    opacity=0.4,
    height=500,
)

### There all sorts of plots with plotly.express. Here's an example of a histogram

In [None]:
px.histogram(
    title='Histogram of average rating vs titletype',
    data_frame=df,
    x='averageRating',
    color='titleType',
    histnorm='probability density',
)

### Or a boxplot of rating by titletype

In [None]:
px.box(
    title='Comparing average rating by titleType',
    data_frame=df,
    x='titleType',
    y='averageRating',
)

## We can do a count of values by using method `.value_counts(dropna=False)`
## Let's make a count of all the countries in the dataset

## And let's plot that count with a barplot