In [33]:
import pandas as pd
import plotly.express as px

In [34]:
df = pd.read_csv('../Data/Processed.csv')

# Ensure we can treat our Doctor IDs and seasons as strings, not ints (works better for legends and scales)
df['doctor'] = df['doctorid'].astype(str)
df['season_str'] = df['season'].astype(str)

df.head()

Unnamed: 0,episode_id,Director,Has 10,Has 11,Has 12,Has 13,Has 9,Has Amy Pond,Has Bill,Has Clara,...,date,doctorid,rating,season,share,title,views,votes,doctor,season_str
0,27-1,Keith Boak,0,0,0,0,1,0.0,0.0,0.0,...,2005-03-26,9,7.6,1,44.8,Rose,10.81,6504,9,1
1,27-2,Euros Lyn,0,0,0,0,1,0.0,0.0,0.0,...,2005-04-02,9,7.6,1,37.8,The End of the World,7.97,5684,9,1
2,27-3,Euros Lyn,0,0,0,0,1,0.0,0.0,0.0,...,2005-04-09,9,7.6,1,37.8,The Unquiet Dead,8.86,5326,9,1
3,27-4,Keith Boak,0,0,0,0,1,0.0,0.0,0.0,...,2005-04-16,9,7.0,1,35.7,Aliens of London,7.63,5116,9,1
4,27-5,Keith Boak,0,0,0,0,1,0.0,0.0,0.0,...,2005-04-23,9,7.1,1,40.2,World War Three,7.98,4943,9,1


In [19]:
# Common themes
template='plotly_dark'
theme_discrete = px.colors.qualitative.Prism
theme_diverging_neutral = px.colors.diverging.RdYlBu
theme_diverging = px.colors.diverging.Picnic_r
theme_diverging_r = px.colors.diverging.Picnic
theme_sequential = px.colors.sequential.Agsunset
theme_sequential_r = px.colors.sequential.Agsunset_r
theme_continuous= px.colors.diverging.balance
theme_hot = px.colors.sequential.Reds
theme_cold = px.colors.sequential.Blues

# Helper functions to standardize display
def format_and_show_short(fig):
    fig.update_layout(template=template,
                      height=400)
    fig.show()

def format_and_show(fig):
    fig.update_layout(template=template,
                      height=550)
    fig.show()

def format_and_show_tall(fig):
    fig.update_layout(template=template,
                      height=800)
    fig.show()

def format_and_show_3d(fig):
    fig.update_layout(template=template,
                      width=800,
                      height=600)
    fig.show()

def format_and_show_sunburst(fig):
    fig.update_layout(template=template,
                      width=1024,
                      height=800)
    fig.show()

In [20]:
mdf = pd.read_csv('../Data/Merged.csv')
mdf.drop(columns=['Unnamed: 0'], inplace=True)
mdf.head()

Unnamed: 0,title,air_date,doctorid,number,rating,votes,description,season,episode_id,episodenbr,...,broadcasthour,duration,views,share,AI,chart,cast,crew,summary,date
0,The Lie of the Land,03 Jun 2017,12,8,7.2,2560,Earth is invaded and Bill is living alone in o...,36,36-8,835,...,7:38pm,00:44:38,4.82m,20.3%,82.0,30,"[{""role"":""The Doctor"",""name"":""Peter Capaldi""},...","[{""role"":""Writer"",""name"":""Toby Whithouse""},{""r...",The world is gripped by a mass delusion and on...,2017-06-03
1,The Woman Who Fell to Earth,07 Oct 2018,13,1,7.1,8066,"In a South Yorkshire city, Ryan Sinclair, Yasm...",37,37-1,841,...,6:45pm,01:03:04,10.96m,45.1%,83.0,1,"[{""role"":""The Doctor"",""name"":""Jodie Whittaker""...","[{""role"":""Writer"",""name"":""Chris Chibnall""},{""r...",We don’t get aliens in Sheffield. \n\n \n\nIn ...,2018-10-07
2,Resolution,1 Jan 2019,13,11,6.0,2690,"As the New Year begins, a terrifying evil is s...",37,37-11,851,...,7:00pm,01:00:00,7.13m,26.6%,80.0,14,"[{""role"":""The Doctor"",""name"":""Jodie Whittaker""...","[{""role"":""Writer"",""name"":""Chris Chibnall""},{""r...",,2019-01-01
3,The Doctor Falls,1 Jul 2017,12,12,9.0,3584,The Doctor makes a final stand against an army...,36,36-12,839,...,6:31pm,01:00:27,5.30m,31.3%,83.0,16,"[{""role"":""The Doctor"",""name"":""Peter Capaldi""},...","[{""role"":""Writer"",""name"":""Steven Moffat""},{""ro...",The Mondasian Cybermen are on the rise. It’s t...,2017-07-01
4,Army of Ghosts,"1 Jul, 2006",10,12,8.5,4885,When ghosts of loved ones appear all over the ...,28,28-12,722,...,7:01pm,00:43:20,8.19m,45.1%,86.0,7,"[{""role"":""The Doctor"",""name"":""David Tennant""},...","[{""role"":""Writer"",""name"":""Russell T Davies""},{...","The Cybermen have invaded Earth, but the Docto...",2006-07-01


In [21]:
# Rename / recapitalize things for formatting
labels = {
    'doctorid': 'Doctor',
    'doctor': 'Doctor',
    'rating': 'Rating',
    'date': 'Date',
    'season': 'Season',
    'season_str': 'Season',
    'cluster': 'Cluster',
    'share': 'Share',
}

# Data to include on tooltips
hover_data = ['season', 'doctorid', 'Writer', 'Director', 'Producer', 'rating']

In [22]:
fig = px.scatter(df, 
    x='date', 
    y='rating', 
    color='season', 
    title='Ratings by Season',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_sequential,
    color_continuous_scale=theme_sequential)
    
format_and_show(fig)

In [23]:
fig = px.scatter_3d(df, 
    x='Producer', 
    y='Writer',
    z='rating',
    color='doctor', 
    title='Ratings by Writer, Producer, and Doctor',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_continuous_scale=theme_sequential,
    color_discrete_sequence=theme_sequential_r)

format_and_show_3d(fig)

In [24]:
fig = px.scatter_3d(df, 
    x='Producer', 
    y='Director',
    z='rating',
    color='doctor', 
    title='Ratings by Director, Producer, and Doctor',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_continuous_scale=theme_sequential,
    color_discrete_sequence=theme_sequential_r)

format_and_show_3d(fig)

In [25]:
fig = px.scatter_3d(df, 
    x='Producer', 
    y='Writer',
    z='rating',
    color='doctor', 
    title='Ratings by Director, Writer, and Doctor',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_continuous_scale=theme_sequential,
    color_discrete_sequence=theme_sequential_r)

format_and_show_3d(fig)

In [26]:
fig = px.box(df.sort_values(['rating','Writer']), 
    x='rating', 
    y='Writer',
    color='Writer',
    points='all',
    title='Ratings by Writer',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show_tall(fig)

In [27]:
fig = px.box(df.sort_values(['rating','Director']), 
    x='rating', 
    y='Director',
    color='Director',
    points='all',
    title='Ratings by Director',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show_tall(fig)

In [28]:
fig = px.box(df.sort_values(['rating','Producer']), 
    x='rating', 
    y='Producer',
    color='Producer',
    points='all',
    title='Ratings by Producer',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show(fig)

In [29]:
fig = px.box(df.sort_values(['rating','Music']), 
    x='rating', 
    y='Music',
    color='Music',
    points='all',
    title='Ratings by Composer',
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_discrete)

fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

format_and_show(fig)

In [30]:
fig = px.histogram(df.sort_values(['doctorid']), 
    x='rating', 
    y='doctorid',
    color='doctorid',
    title='Ratings by Doctor',
    marginal='violin',
    nbins=75,
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_sequential)

# TODO: Y Axis Label

format_and_show_tall(fig)

In [31]:
fig = px.histogram(df.sort_values(['season']), 
    x='rating', 
    y='season',
    color='season',
    title='Ratings by Season',
    marginal='violin',
    nbins=75,
    labels=labels,
    hover_name='title',
    hover_data=hover_data,
    color_discrete_sequence=theme_discrete)

# TODO: Y Axis Label

format_and_show_tall(fig)

In [32]:
factors = [col for col in df if col.startswith('Has ') or col.startswith('Is ')]

for factor in factors:
    fig = px.scatter(df, 
        x='date', 
        y='rating',
        color=factor,
        title='Ratings by ' + factor,
        labels=labels,
        hover_name='title',
        hover_data=hover_data,
        color_continuous_scale=theme_diverging_neutral,
        color_discrete_sequence=theme_discrete)

    #fig.update_traces(quartilemethod='linear', jitter=1, showlegend=False)

    format_and_show(fig)