In [26]:
import pandas as pd
import plotly.express as px

In [27]:
df = pd.read_csv('../Data/Processed.csv')

df.drop(columns=['Unnamed: 0'], inplace=True)

df.head()

Unnamed: 0,Director,Has 10,Has 11,Has 12,Has 13,Has 9,Has Amy Pond,Has Auton,Has Bill,Has Captain Jack Harkness,...,doctorid,duration,episode_id,rating,season,share,title,views,votes,weekday
0,Keith Boak,0,0,0,0,1,0.0,1.0,0.0,0.0,...,9,00:44:14,27-1,7.6,27,44.8,Rose,10.81,6504,Sat
1,Euros Lyn,0,0,0,0,1,0.0,0.0,0.0,0.0,...,9,00:44:45,27-2,7.6,27,37.8,The End of the World,7.97,5684,Sat
2,Euros Lyn,0,0,0,0,1,0.0,0.0,0.0,0.0,...,9,00:44:50,27-3,7.6,27,37.8,The Unquiet Dead,8.86,5326,Sat
3,Keith Boak,0,0,0,0,1,0.0,0.0,0.0,0.0,...,9,00:45:05,27-4,7.0,27,35.7,Aliens of London,7.63,5116,Sat
4,Keith Boak,0,0,0,0,1,0.0,0.0,0.0,0.0,...,9,00:40:40,27-5,7.1,27,40.2,World War Three,7.98,4943,Sat


In [28]:
# Common themes
template='plotly_dark'
theme_discrete = px.colors.qualitative.Prism
theme_diverging_neutral = px.colors.diverging.RdYlBu
theme_diverging = px.colors.diverging.Picnic_r
theme_diverging_r = px.colors.diverging.Picnic
theme_sequential = px.colors.sequential.Agsunset
theme_continuous= px.colors.diverging.balance
theme_hot = px.colors.sequential.Reds
theme_cold = px.colors.sequential.Blues

# Helper functions to standardize display
def format_and_show_short(fig):
    fig.update_layout(template=template,
                      height=400)
    fig.show()

def format_and_show(fig):
    fig.update_layout(template=template,
                      height=550)
    fig.show()

def format_and_show_tall(fig):
    fig.update_layout(template=template,
                      height=800)
    fig.show()

def format_and_show_3d(fig):
    fig.update_layout(template=template,
                      width=800,
                      height=600)
    fig.show()

def format_and_show_sunburst(fig):
    fig.update_layout(template=template,
                      width=1024,
                      height=800)
    fig.show()

In [29]:
fig = px.scatter(df, 
    x='date', 
    y='rating', 
    color='season', 
    title='Ratings by Season',
    color_continuous_scale=theme_sequential)
    
format_and_show(fig)

In [40]:
fig = px.scatter_3d(df, 
    x='Producer', 
    y='Writer',
    z='rating',
    color='rating', 
    title='Ratings by Season & Doctor',
    color_continuous_scale=theme_sequential)

format_and_show_3d(fig)

In [74]:
fig = px.box(df.sort_values(['rating','Writer']), 
    x='rating', 
    y='Writer',
    color='Writer',
    points='all',
    title='Ratings by Writer',
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show_tall(fig)

In [76]:
fig = px.box(df.sort_values(['rating','Director']), 
    x='rating', 
    y='Director',
    color='Director',
    points='all',
    title='Ratings by Director',
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show_tall(fig)

In [77]:
fig = px.box(df.sort_values(['rating','Producer']), 
    x='rating', 
    y='Producer',
    color='Producer',
    points='all',
    title='Ratings by Producer',
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show(fig)

In [95]:
fig = px.histogram(df.sort_values(['doctorid']), 
    x='rating', 
    y='doctorid',
    color='doctorid',
    title='Ratings by Doctor',
    marginal='violin',
    nbins=75,
    color_discrete_sequence=theme_sequential)

format_and_show_tall(fig)