In [22]:
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
from math import ceil

In [23]:
df = pd.read_csv("Scooby-Doo Completed.csv")

df.head()

Unnamed: 0,index,series.name,network,season,title,imdb,engagement,date.aired,run.time,format,...,batman,scooby-dum,scrappy-doo,hex.girls,blue.falcon,fred.va,daphnie.va,velma.va,shaggy.va,scooby.va
0,1,"Scooby Doo, Where Are You!",CBS,1,What a Night for a Knight,8.1,556.0,1969-09-13,21,TV Series,...,False,False,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick
1,2,"Scooby Doo, Where Are You!",CBS,1,A Clue for Scooby Doo,8.1,479.0,1969-09-20,22,TV Series,...,False,False,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick
2,3,"Scooby Doo, Where Are You!",CBS,1,Hassle in the Castle,8.0,455.0,1969-09-27,21,TV Series,...,False,False,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick
3,4,"Scooby Doo, Where Are You!",CBS,1,Mine Your Own Business,7.8,426.0,1969-10-04,21,TV Series,...,False,False,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick
4,5,"Scooby Doo, Where Are You!",CBS,1,Decoy for a Dognapper,7.5,391.0,1969-10-11,21,TV Series,...,False,False,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick


In [24]:
def get_caught_by(row):
    return_character="None"
    for character in ['fred', 'daphnie', 'velma', 'shaggy', 'scooby']:
        if row[f'caught.{character}']:
            if return_character == "None":
                return_character = character.capitalize()
            else:
                return_character += " & " + character.capitalize()
    if return_character == 'Fred & Daphnie & Velma & Shaggy & Scooby':
        return "Whole Gang"
    return return_character


In [25]:
def unmasked_by(row):
    return_character="None"
    for character in ['fred', 'daphnie', 'velma', 'shaggy', 'scooby']:
        if row[f'unmask.{character}']:
            if return_character == "None":
                return_character = character.capitalize()
            else:
                return_character += " & " + character.capitalize()
    if return_character == 'Fred & Daphnie & Velma & Shaggy & Scooby':
        return "Whole Gang"
    return return_character


In [26]:
df['monster.name'] = df['monster.name'].replace('NULL', 'None').fillna('None')
df['monster.name'] = df['monster.type'].replace('NULL', 'None').fillna('None')
df['monster.real'] = df['monster.real'].replace('NULL', 'None').fillna('None')

df['monster.type'] = df['monster.type'].replace('Disguise', 'Disguised').replace('Disugised', 'Disguised').replace(' Disguised', 'Disguised').replace(' ', 'None').fillna('None')

df['caught.fred'] = df['caught.fred'].replace('NULL', 'False').fillna('None')
df['caught.daphnie'] = df['caught.daphnie'].replace('NULL', 'False').fillna('None')
df['caught.velma'] = df['caught.velma'].replace('NULL', 'False').fillna('None')
df['caught.shaggy']= df['caught.shaggy'].replace('NULL', 'False').fillna('None')
df['caught.scooby']= df['caught.scooby'].replace('NULL', 'False').fillna('None')

df['captured.fred'] = df['captured.fred'].replace('NULL', 'False').fillna('None')
df['captured.daphnie'] = df['captured.daphnie'].replace('NULL', 'False').fillna('None')
df['captured.velma'] = df['captured.velma'].replace('NULL', 'False').fillna('None')
df['captured.shaggy']= df['captured.shaggy'].replace('NULL', 'False').fillna('None')
df['captured.scooby']= df['captured.scooby'].replace('NULL', 'False').fillna('None')


df['fred.va'] = df['fred.va'].ffill()
df['daphnie.va'] = df['daphnie.va'].ffill()
df['velma.va'] = df['velma.va'].ffill()
df['shaggy.va']= df['shaggy.va'].ffill()
df['scooby.va']= df['scooby.va'].ffill()

df['monster_caught_by'] = df.apply(get_caught_by, axis=1)
df['monster_unmasked_by'] = df.apply(unmasked_by, axis=1)


# Preview the flattened dataset
df.head(100)

Unnamed: 0,index,series.name,network,season,title,imdb,engagement,date.aired,run.time,format,...,scrappy-doo,hex.girls,blue.falcon,fred.va,daphnie.va,velma.va,shaggy.va,scooby.va,monster_caught_by,monster_unmasked_by
0,1,"Scooby Doo, Where Are You!",CBS,1,What a Night for a Knight,8.1,556.0,1969-09-13,21,TV Series,...,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick,Shaggy & Scooby,Scooby
1,2,"Scooby Doo, Where Are You!",CBS,1,A Clue for Scooby Doo,8.1,479.0,1969-09-20,22,TV Series,...,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick,Shaggy,Fred
2,3,"Scooby Doo, Where Are You!",CBS,1,Hassle in the Castle,8.0,455.0,1969-09-27,21,TV Series,...,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick,Scooby,Fred
3,4,"Scooby Doo, Where Are You!",CBS,1,Mine Your Own Business,7.8,426.0,1969-10-04,21,TV Series,...,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick,Fred,Fred
4,5,"Scooby Doo, Where Are You!",CBS,1,Decoy for a Dognapper,7.5,391.0,1969-10-11,21,TV Series,...,False,False,False,Frank Welker,Stefanianna Christopherson,Nicole Jaffe,Casey Kasem,Don Messick,Scooby,Scooby
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,Laff-a-Lympics,ABC,1,Israel,6.7,23.0,1977-11-12,12,TV Series (segmented),...,False,False,False,Frank Welker,Heather North,Pat Stevens,Casey Kasem,Don Messick,Fred & Daphnie & Velma & Shaggy,Fred & Daphnie & Velma & Shaggy
96,97,Laff-a-Lympics,ABC,1,Africa,6.4,20.0,1977-11-19,12,TV Series (segmented),...,False,False,False,Frank Welker,Heather North,Pat Stevens,Casey Kasem,Don Messick,Fred & Daphnie & Velma & Shaggy,Fred & Daphnie & Velma & Shaggy
97,98,Laff-a-Lympics,ABC,1,San Francisco,6.4,20.0,1977-11-19,12,TV Series (segmented),...,False,False,False,Frank Welker,Heather North,Pat Stevens,Casey Kasem,Don Messick,Fred & Daphnie & Velma & Scooby,Fred & Daphnie & Velma & Scooby
98,99,Laff-a-Lympics,ABC,1,The Grand Canyon,6.3,22.0,1977-11-26,12,TV Series (segmented),...,False,False,True,Frank Welker,Heather North,Pat Stevens,Casey Kasem,Don Messick,Fred & Daphnie & Velma,Fred & Daphnie & Velma


In [None]:
df.to_csv("ScoobyDooCleaned",index=False)