### SNL Records
This notebook collects the record holders of Saturday Night Live.

In [1]:
import pandas as pd
import numpy as np
import bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
output_notebook()

In [2]:
fs = pd.read_csv('./db/snl_season.csv', encoding="utf-8")
dfe = pd.read_csv('./db/snl_episode.csv', encoding="utf-8",parse_dates=['aired'])
dft = pd.read_csv('./db/snl_title.csv', encoding="utf-8")
dfa = pd.read_csv('./db/snl_actor.csv', encoding="utf-8")
dfat = pd.read_csv('./db/snl_actor_title.csv', encoding="utf-8")
dfr = pd.read_csv('./db/snl_rating.csv', encoding="utf-8")

In [3]:
dfer = pd.merge(dfe, dfr, on=['sid', 'eid'])
dfactors = pd.merge(pd.merge(dfat, dfer, on=['sid', 'eid']), dfa, on='aid')

### Most Titles

In [4]:
dfactors['name'].value_counts().head(5)

Kenan Thompson     928
Phil Hartman       913
Darrell Hammond    766
Fred Armisen       739
Bill Hader         696
Name: name, dtype: int64

### Most Titles by Season

In [5]:
df_title_season = pd.DataFrame(dfactors.groupby(['sid','name'])['aid'].count()).reset_index()
df_title_season = df_title_season.sort_values('aid', ascending=False).drop_duplicates(['sid'])
df_title_season.columns = ['Season', 'Name', 'Appearances']
df_title_season.sort_values('Season').set_index('Season')

Unnamed: 0_level_0,Name,Appearances
Season,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Chevy Chase,129
2,Dan Aykroyd,104
3,Dan Aykroyd,109
4,Bill Murray,96
5,Bill Murray,110
6,Charles Rocket,83
7,Joe Piscopo,97
8,Joe Piscopo,97
9,Tim Kazurinsky,99
10,Christopher Guest,68


### Hosts

In [6]:
df_host = pd.DataFrame(dfactors[dfactors.actorType == 'host'].groupby(['sid','eid','name']).count()).reset_index()
pd.DataFrame(df_host['name'].value_counts()).head(10)

Unnamed: 0,name
Alec Baldwin,16
Steve Martin,15
John Goodman,13
Buck Henry,10
Tom Hanks,9
Chevy Chase,8
Christopher Walken,7
Danny DeVito,6
Elliott Gould,6
Drew Barrymore,6


### Most Titles by Category

In [7]:
df_title_cat = pd.DataFrame(dfactors.groupby(['actorType','name'])['aid'].count()).reset_index()
df_title_cat = df_title_cat.sort_values('aid', ascending=False).drop_duplicates(['actorType'])
df_title_cat.columns = ['actorType', 'Name', 'Appearances']
df_title_cat.set_index("actorType")

Unnamed: 0_level_0,Name,Appearances
actorType,Unnamed: 1_level_1,Unnamed: 2_level_1
cast,Kenan Thompson,928
crew,Steve Higgins,490
unknown,Jack Handey,123
host,Alec Baldwin,101
music,Paul Simon,25
guest,Jim Henson's Muppets,17
cameo,Tina Fey,13
filmed,Steve Carell,13


### Actors by Category

In [8]:
df_act_cat = pd.DataFrame(dfactors.groupby(['actorType','name'])['aid'].count()).reset_index()
df_act_cat.columns = ['Type', 'Name', 'Appearances']
for actorType in df_act_cat.Type.unique():
    df_act_cat[actorType] = 0
    df_act_cat.loc[df_act_cat.Type==actorType, actorType] = df_act_cat['Appearances']
df_act_cat = df_act_cat.drop(['Type','Appearances'],axis=1)
df_act_cat = df_act_cat.groupby('Name').sum()

In [9]:
df_act_cat

Unnamed: 0_level_0,cameo,cast,crew,filmed,guest,host,music,unknown
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
'N Sync,0,0,0,0,0,0,3,0
"10,000 Maniacs",0,0,0,0,0,0,4,0
14 Karat Soul,1,0,0,0,0,0,1,0
2 Chainz,1,0,0,0,0,0,0,0
3-D,0,0,0,0,0,0,1,0
3RDEYEGIRL,1,0,0,0,0,0,0,0
50 Cent,0,0,0,0,0,0,4,0
A Tribe Called Quest,0,0,0,0,0,0,2,0
A. Whitney Brown,1,78,0,0,0,0,0,0
A.J. Benza,1,0,0,0,0,0,0,0


In [10]:
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"
p = figure(plot_width=800, plot_height=800, y_range=(0,10), tools=TOOLS)
r = p.scatter("cast","host",source=df_act_cat)
t = show(p, notebook_handle=True)