In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
from pylab import rcParams

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

# keras imports - comment them out or do `pip install keras`
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers.embeddings import Embedding

# gensim for pretrained word2vec model
import gensim

# for synopsis clean up
import string

# list of stopwords used by MySQL in MyISAM
stop_words = ["a's" , "able" , "about" , "above" , "according" , "accordingly" , "across" , "actually" , "after" , "afterwards" , "again" , "against" , "ain't" , "all" , "allow" , "allows" , "almost" , "alone" , "along" , "already" , "also" , "although" , "always" , "am" , "among" , "amongst" , "an" , "and" , "another" , "any" , "anybody" , "anyhow" , "anyone" , "anything" , "anyway" , "anyways" , "anywhere" , "apart" , "appear" , "appreciate" , "appropriate" , "are" , "aren't" , "around" , "as" , "aside" , "ask" , "asking" , "associated" , "at" , "available" , "away" , "awfully" , "be" , "became" , "because" , "become" , "becomes" , "becoming" , "been" , "before" , "beforehand" , "behind" , "being" , "believe" , "below" , "beside" , "besides" , "best" , "better" , "between" , "beyond" , "both" , "brief" , "but" , "by" , "c'mon" , "c's" , "came" , "can" , "can't" , "cannot" , "cant" , "cause" , "causes" , "certain" , "certainly" , "changes" , "clearly" , "co" , "com" , "come" , "comes" , "concerning" , "consequently" , "consider" , "considering" , "contain" , "containing" , "contains" , "corresponding" , "could" , "couldn't" , "course" , "currently" , "definitely" , "described" , "despite" , "did" , "didn't" , "different" , "do" , "does" , "doesn't" , "doing" , "don't" , "done" , "down" , "downwards" , "during" , "each" , "edu" , "eg" , "eight" , "either" , "else" , "elsewhere" , "enough" , "entirely" , "especially" , "et" , "etc" , "even" , "ever" , "every" , "everybody" , "everyone" , "everything" , "everywhere" , "ex" , "exactly" , "example" , "except" , "far" , "few" , "fifth" , "first" , "five" , "followed" , "following" , "follows" , "for" , "former" , "formerly" , "forth" , "four" , "from" , "further" , "furthermore" , "get" , "gets" , "getting" , "given" , "gives" , "go" , "goes" , "going" , "gone" , "got" , "gotten" , "greetings" , "had" , "hadn't" , "happens" , "hardly" , "has" , "hasn't" , "have" , "haven't" , "having" , "he" , "he's" , "hello" , "help" , "hence" , "her" , "here" , "here's" , "hereafter" , "hereby" , "herein" , "hereupon" , "hers" , "herself" , "hi" , "him" , "himself" , "his" , "hither" , "hopefully" , "how" , "howbeit" , "however" , "i'd" , "i'll" , "i'm" , "i've" , "ie" , "if" , "ignored" , "immediate" , "in" , "inasmuch" , "inc" , "indeed" , "indicate" , "indicated" , "indicates" , "inner" , "insofar" , "instead" , "into" , "inward" , "is" , "isn't" , "it" , "it'd" , "it'll" , "it's" , "its" , "itself" , "just" , "keep" , "keeps" , "kept" , "know" , "known" , "knows" , "last" , "lately" , "later" , "latter" , "latterly" , "least" , "less" , "lest" , "let" , "let's" , "like" , "liked" , "likely" , "little" , "look" , "looking" , "looks" , "ltd" , "mainly" , "many" , "may" , "maybe" , "me" , "mean" , "meanwhile" , "merely" , "might" , "more" , "moreover" , "most" , "mostly" , "much" , "must" , "my" , "myself" , "name" , "namely" , "nd" , "near" , "nearly" , "necessary" , "need" , "needs" , "neither" , "never" , "nevertheless" , "new" , "next" , "nine" , "no" , "nobody" , "non" , "none" , "noone" , "nor" , "normally" , "not" , "nothing" , "novel" , "now" , "nowhere" , "obviously" , "of" , "off" , "often" , "oh" , "ok" , "okay" , "old" , "on" , "once" , "one" , "ones" , "only" , "onto" , "or" , "other" , "others" , "otherwise" , "ought" , "our" , "ours" , "ourselves" , "out" , "outside" , "over" , "overall" , "own" , "particular" , "particularly" , "per" , "perhaps" , "placed" , "please" , "plus" , "possible" , "presumably" , "probably" , "provides" , "que" , "quite" , "qv" , "rather" , "rd" , "re" , "really" , "reasonably" , "regarding" , "regardless" , "regards" , "relatively" , "respectively" , "right" , "said" , "same" , "saw" , "say" , "saying" , "says" , "second" , "secondly" , "see" , "seeing" , "seem" , "seemed" , "seeming" , "seems" , "seen" , "self" , "selves" , "sensible" , "sent" , "serious" , "seriously" , "seven" , "several" , "shall" , "she" , "should" , "shouldn't" , "since" , "six" , "so" , "some" , "somebody" , "somehow" , "someone" , "something" , "sometime" , "sometimes" , "somewhat" , "somewhere" , "soon" , "sorry" , "specified" , "specify" , "specifying" , "still" , "sub" , "such" , "sup" , "sure" , "t's" , "take" , "taken" , "tell" , "tends" , "th" , "than" , "thank" , "thanks" , "thanx" , "that" , "that's" , "thats" , "the" , "their" , "theirs" , "them" , "themselves" , "then" , "thence" , "there" , "there's" , "thereafter" , "thereby" , "therefore" , "therein" , "theres" , "thereupon" , "these" , "they" , "they'd" , "they'll" , "they're" , "they've" , "think" , "third" , "this" , "thorough" , "thoroughly" , "those" , "though" , "three" , "through" , "throughout" , "thru" , "thus" , "to" , "together" , "too" , "took" , "toward" , "towards" , "tried" , "tries" , "truly" , "try" , "trying" , "twice" , "two" , "un" , "under" , "unfortunately" , "unless" , "unlikely" , "until" , "unto" , "up" , "upon" , "us" , "use" , "used" , "useful" , "uses" , "using" , "usually" , "value" , "various" , "very" , "via" , "viz" , "vs" , "want" , "wants" , "was" , "wasn't" , "way" , "we" , "we'd" , "we'll" , "we're" , "we've" , "welcome" , "well" , "went" , "were" , "weren't" , "what" , "what's" , "whatever" , "when" , "whence" , "whenever" , "where" , "where's" , "whereafter" , "whereas" , "whereby" , "wherein" , "whereupon" , "wherever" , "whether" , "which" , "while" , "whither" , "who" , "who's" , "whoever" , "whole" , "whom" , "whose" , "why" , "will" , "willing" , "wish" , "with" , "within" , "without" , "won't" , "wonder" , "would" , "wouldn't" , "yes" , "yet" , "you" , "you'd" , "you'll" , "you're" , "you've" , "your" , "yours" , "yourself" , "yourselves" , "zero"]

rcParams['figure.figsize'] = 10, 6
display(HTML("<style>.container { width:95% !important; }</style>"))

Using TensorFlow backend.
unable to import 'smart_open.gcs', disabling that module


In [2]:
df = pd.read_csv("data/tidy_anime.csv")
df.shape

(77911, 28)

In [3]:
desired_cols = ['animeID', 'title_english', 'type', 'source', 'producers', 'genre', 'studio',
               'episodes', 'premiered', 'rating', 'score', 'scored_by', 'rank', 'popularity',
               'members', 'favorites', 'synopsis']
truncated_df = df[desired_cols]

In [4]:
desired_cols = ['animeID', 'title_english', 'type', 'source', 'producers', 'genre', 'studio',
               'episodes', 'premiered', 'rating', 'score', 'scored_by', 'rank', 'popularity',
               'members', 'favorites', 'synopsis']
truncated_df = df[desired_cols]
truncated_df.head()

Unnamed: 0,animeID,title_english,type,source,producers,genre,studio,episodes,premiered,rating,score,scored_by,rank,popularity,members,favorites,synopsis
0,1,Cowboy Bebop,TV,Original,Bandai Visual,Action,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
1,1,Cowboy Bebop,TV,Original,Bandai Visual,Adventure,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
2,1,Cowboy Bebop,TV,Original,Bandai Visual,Comedy,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
3,1,Cowboy Bebop,TV,Original,Bandai Visual,Drama,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
4,1,Cowboy Bebop,TV,Original,Bandai Visual,Sci-Fi,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."


In [5]:
#change NaN to 0 in Premiered column

truncated_df.loc[:,'premiered'] = truncated_df.loc[:,'premiered'].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [6]:
# filter out bad titles. Only want titles that have an english name

orig_len = len(truncated_df)
filtered_df = truncated_df[truncated_df['title_english'].notnull()]
new_len = len(filtered_df)
print ("removed {} bad anime after filtering for english titled anime only".format(orig_len - new_len))

# drop NaN rows
filtered_df.dropna(inplace=True)
print ("removed {} bad anime after dropping NaN rows".format(new_len - len(filtered_df)))


removed 30430 bad anime after filtering for english titled anime only
removed 8678 bad anime after dropping NaN rows


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [7]:
# currently the anime is duplicated, one row per genre per studio. We need to flatten all to one row
# also need to do this for type, source, producers, rating

all_ids = set(filtered_df['animeID'].unique()) # 1.8K anime IDs
print ("{} unique anime".format(len(all_ids)))

id_genre_mapping = {}
for each_id in all_ids:
    genre_list = list(filtered_df[truncated_df['animeID'] == each_id]['genre'])
    id_genre_mapping[each_id] = genre_list
    
id_studio_mapping = {}
for each_id in all_ids:
    id_studio_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['studio'])
    
id_source_mapping = {}
for each_id in all_ids:
    id_source_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['source'])
    
id_producers_mapping = {}
for each_id in all_ids:
    id_producers_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['producers'])
    
id_rating_mapping = {}
for each_id in all_ids:
    id_rating_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['rating'])
    
id_type_mapping = {}
for each_id in all_ids:
    id_type_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['type'])

2855 unique anime


  if __name__ == '__main__':
  


In [8]:
# get distinct df, remove duplicates
reduced_df = filtered_df.groupby('animeID').head(1)

In [9]:
# will add 40 columns to the data
all_genres = sorted(list(set([item for sublist in id_genre_mapping.values() for item in sublist])))
all_studios = sorted(list(set([item for sublist in id_studio_mapping.values() for item in sublist])))
all_sources = sorted(list(set([item for sublist in id_source_mapping.values() for item in sublist])))
all_producers = sorted(list(set([item for sublist in id_producers_mapping.values() for item in sublist])))
all_ratings = sorted(list(set([item for sublist in id_rating_mapping.values() for item in sublist])))
all_types = sorted(list(set([item for sublist in id_type_mapping.values() for item in sublist])))



In [10]:
anime_IDs = reduced_df.animeID.tolist()
genres_new = []
studios_new = []
sources_new = []
producers_new = []
ratings_new = []
types_new = []
for each_id in anime_IDs:
    genres_new.append(id_genre_mapping[each_id])
    studios_new.append(id_studio_mapping[each_id])
    sources_new.append(id_source_mapping[each_id])
    producers_new.append(id_producers_mapping[each_id])
    ratings_new.append(id_rating_mapping[each_id])
    types_new.append(id_type_mapping[each_id])
reduced_df.head()

Unnamed: 0,animeID,title_english,type,source,producers,genre,studio,episodes,premiered,rating,score,scored_by,rank,popularity,members,favorites,synopsis
0,1,Cowboy Bebop,TV,Original,Bandai Visual,Action,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
6,5,Cowboy Bebop: The Movie,Movie,Original,Sunrise,Action,Bones,1.0,0,R - 17+ (violence & profanity),8.41,120243,164,449,197791,776,"Another day, another bounty—such is the life o..."
16,6,Trigun,TV,Manga,Victor Entertainment,Action,Madhouse,26.0,Spring 1998,PG-13 - Teens 13 or older,8.3,212537,255,146,408548,10432,"Vash the Stampede is the man with a $$60,000,0..."
22,7,Witch Hunter Robin,TV,Original,Bandai Visual,Action,Sunrise,26.0,Summer 2002,PG-13 - Teens 13 or older,7.33,32837,2371,1171,79397,537,Witches are individuals with special powers li...
28,8,Beet the Vandel Buster,TV,Manga,TV Tokyo,Adventure,Toei Animation,52.0,Fall 2004,PG - Children,7.03,4894,3544,3704,11708,14,It is the dark century and the people are suff...


In [11]:
# pull out english titles and relevant data we want to show in output
info_df = reduced_df[['animeID', 'title_english', 'synopsis']]
# write this data to a new csv so we can reference it later
info_df.to_csv(r'data/relevant_output_data.csv')

In [12]:
mlb = MultiLabelBinarizer()
encoded_genres = mlb.fit_transform(genres_new)
encoded_studios = mlb.fit_transform(studios_new)
encoded_sources = mlb.fit_transform(sources_new)
encoded_producers = mlb.fit_transform(producers_new)
encoded_ratings = mlb.fit_transform(ratings_new)
encoded_types = mlb.fit_transform(types_new)

In [13]:
genre_columns_added = encoded_genres.shape[1]

# for col_idx in range(genre_columns_added):
#     reduced_df.insert(len(reduced_df.columns), "genre_{}".format(col_idx+1), encoded_genres[:, col_idx])
    
for col_idx in range(genre_columns_added):
    reduced_df.insert(len(reduced_df.columns), "genre_{}".format(all_genres[col_idx]), encoded_genres[:, col_idx])

for col_idx in range(encoded_studios.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "studio_{}".format(all_studios[col_idx]), encoded_studios[:, col_idx])
    
for col_idx in range(encoded_sources.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "source_{}".format(all_sources[col_idx]), encoded_sources[:, col_idx])
    
for col_idx in range(encoded_producers.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "producer_{}".format(all_producers[col_idx]), encoded_producers[:, col_idx])
    
for col_idx in range(encoded_ratings.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "rating_{}".format(all_ratings[col_idx]), encoded_ratings[:, col_idx])
    
for col_idx in range(encoded_types.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "type_{}".format(all_types[col_idx]), encoded_types[:, col_idx])

In [14]:
#### Decreasing Count of Entries for each column value

#set options to see full dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


ranked_genres = reduced_df.groupby('genre').count().sort_values(by=["animeID"], ascending=False)['animeID'].keys().tolist()
#genres_to_drop = ranked_genres[21:]

for genre in ranked_genres[:21]:
    all_genres.remove(genre)
    #Keep top 15? Add Horror, Historical?
genres_to_drop = ["genre_" + genre for genre in all_genres]

ranked_studios = reduced_df.groupby('studio').count().sort_values(by=["animeID"], ascending=False)['animeID'].keys().tolist()
#studios_to_drop = ranked_studios[36:]
    #Keep studios with 20+ records?

for studio in ranked_studios[:36]:
    all_studios.remove(studio)
    #Keep top 15? Add Horror, Historical?
studios_to_drop = ["studio_" + studio for studio in all_studios]

ranked_sources = reduced_df.groupby('source').count().sort_values(by=["animeID"], ascending=False)['animeID'].keys().tolist()
#sources_to_drop = ranked_sources[7:]
#     #Keep all 15?

for source in ranked_sources[:7]:
    all_sources.remove(source)
sources_to_drop = ["source_" + source for source in all_sources]


ranked_producers = reduced_df.groupby('producers').count().sort_values(by=["animeID"], ascending=False)['animeID'].keys().tolist()
#producers_to_drop = ranked_producers[25:]
#     #Keep Producers with 30+ records?
for producer in ranked_producers[:25]:
    all_producers.remove(producer)
producers_to_drop = ["producer_" + producer for producer in all_producers]

ranked_ratings = reduced_df.groupby('rating').count().sort_values(by=["animeID"], ascending=False)['animeID'].keys().tolist()
    #Keep all except 'None'?
    
for rating in ranked_ratings[:5]:
    all_ratings.remove(rating)
ratings_to_drop = ["rating_" + rating for rating in all_ratings]

    
#reduced_df.groupby('type').count().sort_values(by=["animeID"], ascending=False)['animeID']

columns_to_drop = genres_to_drop + studios_to_drop + sources_to_drop + producers_to_drop + ratings_to_drop

len(columns_to_drop)


  import sys


991

In [15]:
reduced_df = reduced_df.drop(['type', 'source', 'producers', 'genre', 'studio', 'rating'], axis = 1)
reduced_df.to_csv(r'data/one_hot_encode_complete.csv')

reduced_df = reduced_df.drop(columns_to_drop, axis = 1)
reduced_df.to_csv(r'data/one_hot_encode_reduced.csv')

reduced_df.head()

Unnamed: 0,animeID,title_english,episodes,premiered,score,scored_by,rank,popularity,members,favorites,synopsis,genre_Action,genre_Adventure,genre_Comedy,genre_Dementia,genre_Drama,genre_Ecchi,genre_Fantasy,genre_Game,genre_Harem,genre_Historical,genre_Horror,genre_Kids,genre_Magic,genre_Military,genre_Music,genre_Mystery,genre_Psychological,genre_Romance,genre_Sci-Fi,genre_Slice of Life,genre_Sports,"studio_""Brains Base""",studio_A-1 Pictures,studio_AIC,studio_Arms,studio_Bones,studio_Diomedea,studio_Doga Kobo,studio_Gainax,studio_Gonzo,studio_J.C.Staff,studio_Kyoto Animation,studio_MAPPA,studio_Madhouse,studio_Manglobe,studio_Nippon Animation,studio_OLM,studio_P.A. Works,studio_Production I.G,studio_Production Reed,studio_Satelight,studio_Shaft,studio_Shin-Ei Animation,studio_Silver Link.,studio_Studio Deen,studio_Studio Gallop,studio_Studio Gokumi,studio_Studio Pierrot,studio_Sunrise,studio_TMS Entertainment,studio_TNK,studio_Tatsunoko Production,studio_Toei Animation,studio_Xebec,studio_Zexcs,studio_feel.,studio_ufotable,source_Game,source_Light novel,source_Manga,source_Novel,source_Original,source_Unknown,source_Visual novel,producer_Aniplex,producer_Bandai Visual,producer_DAX Production,producer_Dentsu,producer_Frontier Works,producer_Fuji TV,producer_Genco,producer_Geneon Universal Entertainment,producer_Kadokawa Shoten,producer_Lantis,producer_Media Factory,producer_Movic,producer_NHK,producer_Nihon Ad Systems,producer_Pony Canyon,producer_Production I.G,producer_Shochiku,producer_Shogakukan Productions,producer_Sotsu,producer_Starchild Records,producer_TBS,producer_TV Asahi,producer_TV Tokyo,producer_VAP,producer_Victor Entertainment,rating_G - All Ages,rating_PG - Children,rating_PG-13 - Teens 13 or older,rating_R - 17+ (violence & profanity),rating_R+ - Mild Nudity,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
0,1,Cowboy Bebop,26.0,Spring 1998,8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as ""Cowboys."" The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member's dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic and an homage to the smooth and improvised music it is named after. [Written by MAL Rewrite]",1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
6,5,Cowboy Bebop: The Movie,1.0,0,8.41,120243,164,449,197791,776,"Another day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this routine is interrupted when Faye, who is chasing a fairly worthless target on Mars, witnesses an oil tanker suddenly explode, causing mass hysteria. As casualties mount due to a strange disease spreading through the smoke from the blast, a whopping three hundred million woolong price is placed on the head of the supposed perpetrator. With lives at stake and a solution to their money problems in sight, the Bebop crew springs into action. Spike, Jet, Faye, and Edward, followed closely by Ein, split up to pursue different leads across Alba City. Through their individual investigations, they discover a cover-up scheme involving a pharmaceutical company, revealing a plot that reaches much further than the ragtag team of bounty hunters could have realized. [Written by MAL Rewrite]",1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
16,6,Trigun,26.0,Spring 1998,8.3,212537,255,146,408548,10432,"Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merciless villain who lays waste to all those that oppose him and flattens entire cities for fun, garnering him the title ""The Humanoid Typhoon."" He leaves a trail of death and destruction wherever he goes, and anyone can count themselves dead if they so much as make eye contact—or so the rumors say. In actuality, Vash is a huge softie who claims to have never taken a life and avoids violence at all costs. With his crazy doughnut obsession and buffoonish attitude in tow, Vash traverses the wasteland of the planet Gunsmoke, all the while followed by two insurance agents, Meryl Stryfe and Milly Thompson, who attempt to minimize his impact on the public. But soon, their misadventures evolve into life-or-death situations as a group of legendary assassins are summoned to bring about suffering to the trio. Vash's agonizing past will be unraveled and his morality and principles pushed to the breaking point. [Written by MAL Rewrite]",1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1
22,7,Witch Hunter Robin,26.0,Summer 2002,7.33,32837,2371,1171,79397,537,"Witches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-year-old craft user, arrives from Italy to Japan to work for an organization named STN Japan Division (STN-J) as a replacement for one of STN-J's witch hunters who was recently killed. Unlike other divisions of STN, STN-J tries to capture the witches alive in order to learn why and how they became witches in the first place. (Source: ANN)",1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1
28,8,Beet the Vandel Buster,52.0,Fall 2004,7.03,4894,3544,3704,11708,14,"It is the dark century and the people are suffering under the rule of the devil, Vandel, who is able to manipulate monsters. The Vandel Busters are a group of people who hunt these devils, and among them, the Zenon Squad is known to be the strongest busters on the continent. A young boy, Beet, dreams of joining the Zenon Squad. However, one day, as a result of Beet's fault, the Zenon squad was defeated by the devil, Beltose. The five dying busters sacrificed their life power into their five weapons, Saiga. After giving their weapons to Beet, they passed away. Years have passed since then and the young Vandel Buster, Beet, begins his adventure to carry out the Zenon Squad's will to put an end to the dark century.",0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1


# Encoding textual data - tokenization approach
* May want to look into cleaning all the synopsis first, ex: remove (), lower case, etc (disregarding this for now)

In [16]:
synopsis_list = reduced_df['synopsis'].tolist()
# synopsis_list

In [17]:
VOCAB_SIZE = None
MAX_SEQ_LEN = 0

# find vocab_size
all_words = {}

# DEBUG
debug = False
init = 0
count = 100

for synopsis in synopsis_list:

    # =========================
    if debug:
        if init == count - 1:
            print("Original Synopsis:")
            print(synopsis + "\n")
    # =========================
    
    # deletes synopsis credits e.g. [Written by MAL Rewrite], (Source: ANN)
    if synopsis[-1] == ")":
        idx = synopsis.rfind("(")
        synopsis = synopsis[:idx]
    elif synopsis[-1] == "]":
        idx = synopsis.rfind("[")
        synopsis = synopsis[:idx]
    
    # deletes punctuation
    punctuation = string.punctuation
    for c in punctuation:
        synopsis = synopsis.replace(c, "")
    
    synopsis = synopsis.lower()
    
    # =========================
    if debug:
        if init == count - 1:
            print("Lower Case Synopsis Without Punctuation:")
            print(synopsis + "\n")
    # =========================

    word_list = synopsis.split(" ")
    
    
    # =========================
    if debug:
        if init == count - 1:
            print("Original Word List:")
            print(str(word_list) + "\n")
    # =========================
    
    # remove stop words
    word_list = [x for x in word_list if x not in stop_words]
    
    
    # =========================
    if debug:
        if init == count - 1:
            print("Word List Without Stop Words:")
            print(str(word_list) + "\n")
        init += 1
    # =========================
    
    # find max seq len
    if len(word_list) > MAX_SEQ_LEN:
        MAX_SEQ_LEN = len(word_list)
        sent = word_list

    for ea_word in word_list:
        
        if ea_word in all_words:
            all_words[ea_word] += 1
        else:
            all_words[ea_word] = 1
VOCAB_SIZE = len(all_words.keys())
print ('vocab_size = ', VOCAB_SIZE)
print ('max_seq_len = ', MAX_SEQ_LEN)
# print (MAX_SEQ_LEN, sent)

vocab_size =  25753
max_seq_len =  290


In [18]:
encoded_synopsis = [one_hot(x, VOCAB_SIZE) for x in synopsis_list]
padded_synopsis = pad_sequences(encoded_synopsis, maxlen=MAX_SEQ_LEN, padding='pre')
padded_synopsis.shape

(2855, 290)

In [19]:
# load pretrained google word2vec model
pretrained_path = 'https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz'
model = gensim.models.KeyedVectors.load_word2vec_format(pretrained_path, binary=True)
# model = gensim.models.KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin.gz', binary=True)

In [20]:
# get averaged word embedding
EMBED_DIM = 300

all_synopsis_vectors = np.empty((0, EMBED_DIM))
print (all_synopsis_vectors.shape)
for each_synopsis in synopsis_list:
    synopsis_words = each_synopsis.split(" ")
    word_count = len(synopsis_words)
    synopsis_sum_vector = np.zeros((EMBED_DIM,))
    for each_word in synopsis_words:
        try:
            synopsis_sum_vector += model[each_word]
        except:
            # word not in pretrained vocab
            pass
    synopsis_avg_vector = (synopsis_sum_vector / word_count).reshape(1, -1)
    all_synopsis_vectors = np.append(all_synopsis_vectors, synopsis_avg_vector, axis=0)
all_synopsis_vectors.shape

(0, 300)


(2855, 300)

In [21]:
synopsis_columns_added = all_synopsis_vectors.shape[1]
for col_idx in range(synopsis_columns_added):
    reduced_df.insert(len(reduced_df.columns), "synopsis_embedded_{}".format(col_idx+1), all_synopsis_vectors[:, col_idx])
reduced_df.head()

Unnamed: 0,animeID,title_english,episodes,premiered,score,scored_by,rank,popularity,members,favorites,synopsis,genre_Action,genre_Adventure,genre_Comedy,genre_Dementia,genre_Drama,genre_Ecchi,genre_Fantasy,genre_Game,genre_Harem,genre_Historical,genre_Horror,genre_Kids,genre_Magic,genre_Military,genre_Music,genre_Mystery,genre_Psychological,genre_Romance,genre_Sci-Fi,genre_Slice of Life,genre_Sports,"studio_""Brains Base""",studio_A-1 Pictures,studio_AIC,studio_Arms,studio_Bones,studio_Diomedea,studio_Doga Kobo,studio_Gainax,studio_Gonzo,studio_J.C.Staff,studio_Kyoto Animation,studio_MAPPA,studio_Madhouse,studio_Manglobe,studio_Nippon Animation,studio_OLM,studio_P.A. Works,studio_Production I.G,studio_Production Reed,studio_Satelight,studio_Shaft,studio_Shin-Ei Animation,studio_Silver Link.,studio_Studio Deen,studio_Studio Gallop,studio_Studio Gokumi,studio_Studio Pierrot,studio_Sunrise,studio_TMS Entertainment,studio_TNK,studio_Tatsunoko Production,studio_Toei Animation,studio_Xebec,studio_Zexcs,studio_feel.,studio_ufotable,source_Game,source_Light novel,source_Manga,source_Novel,source_Original,source_Unknown,source_Visual novel,producer_Aniplex,producer_Bandai Visual,producer_DAX Production,producer_Dentsu,producer_Frontier Works,producer_Fuji TV,producer_Genco,producer_Geneon Universal Entertainment,producer_Kadokawa Shoten,producer_Lantis,producer_Media Factory,producer_Movic,producer_NHK,producer_Nihon Ad Systems,producer_Pony Canyon,producer_Production I.G,producer_Shochiku,producer_Shogakukan Productions,producer_Sotsu,producer_Starchild Records,producer_TBS,producer_TV Asahi,producer_TV Tokyo,producer_VAP,producer_Victor Entertainment,rating_G - All Ages,rating_PG - Children,rating_PG-13 - Teens 13 or older,rating_R - 17+ (violence & profanity),rating_R+ - Mild Nudity,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,synopsis_embedded_1,synopsis_embedded_2,synopsis_embedded_3,synopsis_embedded_4,synopsis_embedded_5,synopsis_embedded_6,synopsis_embedded_7,synopsis_embedded_8,synopsis_embedded_9,synopsis_embedded_10,synopsis_embedded_11,synopsis_embedded_12,synopsis_embedded_13,synopsis_embedded_14,synopsis_embedded_15,synopsis_embedded_16,synopsis_embedded_17,synopsis_embedded_18,synopsis_embedded_19,synopsis_embedded_20,synopsis_embedded_21,synopsis_embedded_22,synopsis_embedded_23,synopsis_embedded_24,synopsis_embedded_25,synopsis_embedded_26,synopsis_embedded_27,synopsis_embedded_28,synopsis_embedded_29,synopsis_embedded_30,synopsis_embedded_31,synopsis_embedded_32,synopsis_embedded_33,synopsis_embedded_34,synopsis_embedded_35,synopsis_embedded_36,synopsis_embedded_37,synopsis_embedded_38,synopsis_embedded_39,synopsis_embedded_40,synopsis_embedded_41,synopsis_embedded_42,synopsis_embedded_43,synopsis_embedded_44,synopsis_embedded_45,synopsis_embedded_46,synopsis_embedded_47,synopsis_embedded_48,synopsis_embedded_49,synopsis_embedded_50,synopsis_embedded_51,synopsis_embedded_52,synopsis_embedded_53,synopsis_embedded_54,synopsis_embedded_55,synopsis_embedded_56,synopsis_embedded_57,synopsis_embedded_58,synopsis_embedded_59,synopsis_embedded_60,synopsis_embedded_61,synopsis_embedded_62,synopsis_embedded_63,synopsis_embedded_64,synopsis_embedded_65,synopsis_embedded_66,synopsis_embedded_67,synopsis_embedded_68,synopsis_embedded_69,synopsis_embedded_70,synopsis_embedded_71,synopsis_embedded_72,synopsis_embedded_73,synopsis_embedded_74,synopsis_embedded_75,synopsis_embedded_76,synopsis_embedded_77,synopsis_embedded_78,synopsis_embedded_79,synopsis_embedded_80,synopsis_embedded_81,synopsis_embedded_82,synopsis_embedded_83,synopsis_embedded_84,synopsis_embedded_85,synopsis_embedded_86,synopsis_embedded_87,synopsis_embedded_88,synopsis_embedded_89,synopsis_embedded_90,synopsis_embedded_91,synopsis_embedded_92,synopsis_embedded_93,synopsis_embedded_94,synopsis_embedded_95,synopsis_embedded_96,synopsis_embedded_97,synopsis_embedded_98,synopsis_embedded_99,synopsis_embedded_100,synopsis_embedded_101,synopsis_embedded_102,synopsis_embedded_103,synopsis_embedded_104,synopsis_embedded_105,synopsis_embedded_106,synopsis_embedded_107,synopsis_embedded_108,synopsis_embedded_109,synopsis_embedded_110,synopsis_embedded_111,synopsis_embedded_112,synopsis_embedded_113,synopsis_embedded_114,synopsis_embedded_115,synopsis_embedded_116,synopsis_embedded_117,synopsis_embedded_118,synopsis_embedded_119,synopsis_embedded_120,synopsis_embedded_121,synopsis_embedded_122,synopsis_embedded_123,synopsis_embedded_124,synopsis_embedded_125,synopsis_embedded_126,synopsis_embedded_127,synopsis_embedded_128,synopsis_embedded_129,synopsis_embedded_130,synopsis_embedded_131,synopsis_embedded_132,synopsis_embedded_133,synopsis_embedded_134,synopsis_embedded_135,synopsis_embedded_136,synopsis_embedded_137,synopsis_embedded_138,synopsis_embedded_139,synopsis_embedded_140,synopsis_embedded_141,synopsis_embedded_142,synopsis_embedded_143,synopsis_embedded_144,synopsis_embedded_145,synopsis_embedded_146,synopsis_embedded_147,synopsis_embedded_148,synopsis_embedded_149,synopsis_embedded_150,synopsis_embedded_151,synopsis_embedded_152,synopsis_embedded_153,synopsis_embedded_154,synopsis_embedded_155,synopsis_embedded_156,synopsis_embedded_157,synopsis_embedded_158,synopsis_embedded_159,synopsis_embedded_160,synopsis_embedded_161,synopsis_embedded_162,synopsis_embedded_163,synopsis_embedded_164,synopsis_embedded_165,synopsis_embedded_166,synopsis_embedded_167,synopsis_embedded_168,synopsis_embedded_169,synopsis_embedded_170,synopsis_embedded_171,synopsis_embedded_172,synopsis_embedded_173,synopsis_embedded_174,synopsis_embedded_175,synopsis_embedded_176,synopsis_embedded_177,synopsis_embedded_178,synopsis_embedded_179,synopsis_embedded_180,synopsis_embedded_181,synopsis_embedded_182,synopsis_embedded_183,synopsis_embedded_184,synopsis_embedded_185,synopsis_embedded_186,synopsis_embedded_187,synopsis_embedded_188,synopsis_embedded_189,synopsis_embedded_190,synopsis_embedded_191,synopsis_embedded_192,synopsis_embedded_193,synopsis_embedded_194,synopsis_embedded_195,synopsis_embedded_196,synopsis_embedded_197,synopsis_embedded_198,synopsis_embedded_199,synopsis_embedded_200,synopsis_embedded_201,synopsis_embedded_202,synopsis_embedded_203,synopsis_embedded_204,synopsis_embedded_205,synopsis_embedded_206,synopsis_embedded_207,synopsis_embedded_208,synopsis_embedded_209,synopsis_embedded_210,synopsis_embedded_211,synopsis_embedded_212,synopsis_embedded_213,synopsis_embedded_214,synopsis_embedded_215,synopsis_embedded_216,synopsis_embedded_217,synopsis_embedded_218,synopsis_embedded_219,synopsis_embedded_220,synopsis_embedded_221,synopsis_embedded_222,synopsis_embedded_223,synopsis_embedded_224,synopsis_embedded_225,synopsis_embedded_226,synopsis_embedded_227,synopsis_embedded_228,synopsis_embedded_229,synopsis_embedded_230,synopsis_embedded_231,synopsis_embedded_232,synopsis_embedded_233,synopsis_embedded_234,synopsis_embedded_235,synopsis_embedded_236,synopsis_embedded_237,synopsis_embedded_238,synopsis_embedded_239,synopsis_embedded_240,synopsis_embedded_241,synopsis_embedded_242,synopsis_embedded_243,synopsis_embedded_244,synopsis_embedded_245,synopsis_embedded_246,synopsis_embedded_247,synopsis_embedded_248,synopsis_embedded_249,synopsis_embedded_250,synopsis_embedded_251,synopsis_embedded_252,synopsis_embedded_253,synopsis_embedded_254,synopsis_embedded_255,synopsis_embedded_256,synopsis_embedded_257,synopsis_embedded_258,synopsis_embedded_259,synopsis_embedded_260,synopsis_embedded_261,synopsis_embedded_262,synopsis_embedded_263,synopsis_embedded_264,synopsis_embedded_265,synopsis_embedded_266,synopsis_embedded_267,synopsis_embedded_268,synopsis_embedded_269,synopsis_embedded_270,synopsis_embedded_271,synopsis_embedded_272,synopsis_embedded_273,synopsis_embedded_274,synopsis_embedded_275,synopsis_embedded_276,synopsis_embedded_277,synopsis_embedded_278,synopsis_embedded_279,synopsis_embedded_280,synopsis_embedded_281,synopsis_embedded_282,synopsis_embedded_283,synopsis_embedded_284,synopsis_embedded_285,synopsis_embedded_286,synopsis_embedded_287,synopsis_embedded_288,synopsis_embedded_289,synopsis_embedded_290,synopsis_embedded_291,synopsis_embedded_292,synopsis_embedded_293,synopsis_embedded_294,synopsis_embedded_295,synopsis_embedded_296,synopsis_embedded_297,synopsis_embedded_298,synopsis_embedded_299,synopsis_embedded_300
0,1,Cowboy Bebop,26.0,Spring 1998,8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as ""Cowboys."" The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member's dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic and an homage to the smooth and improvised music it is named after. [Written by MAL Rewrite]",1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.029809,0.042396,0.022913,0.040259,-0.039149,-0.011959,0.016059,-0.064689,0.050221,0.045491,0.012603,-0.046489,-0.007768,0.011972,-0.054675,0.034183,0.008571,0.073747,-0.006751,0.002686,0.001647,0.037568,-0.011122,-0.006467,0.034074,-0.00513,-0.067323,0.057732,0.03045,-0.02217,-0.002778,-0.025179,-0.017223,0.027981,0.00732,-0.016938,0.013608,-0.023168,0.032683,0.038371,0.065416,-0.013103,0.03174,0.029739,0.01164,-0.045497,-0.009923,0.005677,0.060461,0.029093,-0.022794,0.022537,-0.005312,-0.041699,-0.006712,0.044132,-0.008567,-0.077645,0.002706,-0.033053,-0.029164,0.064243,-0.054654,-0.049431,-0.03452,-0.013828,-0.004685,0.052337,-0.035267,0.0586,0.041502,-0.026901,0.022177,0.002181,-0.070948,-0.03016,0.046594,0.037892,0.00576,0.05166,0.030118,-0.051224,0.014102,-0.040939,-0.000171,-0.049184,-0.062894,0.059879,0.020875,0.035167,0.009019,-0.015971,-0.048447,-0.048976,-0.000485,0.011175,0.030076,0.025286,0.000305,-0.032328,0.017055,-0.033233,-0.002771,-0.001819,-0.021702,-0.0222,0.005008,-0.025583,-0.012564,-0.034225,-0.014823,0.017698,0.013105,0.008766,0.030944,0.016009,0.006116,-0.004369,0.063656,0.03222,-0.077808,-0.008884,-0.035245,-0.016034,-0.026342,-0.010016,-0.043281,-0.005266,0.00396,0.039512,-0.000114,-0.042252,-0.050982,0.000868,-0.025522,0.005533,-0.003248,-0.011425,-0.017867,0.058968,0.047962,-0.035847,-0.022813,0.030239,0.020939,0.045603,-0.00866,0.006972,-0.026107,0.018222,0.041626,-0.014078,-0.041253,0.043558,-0.007807,-0.03033,-0.008229,-0.045358,0.008506,-0.04455,-0.016403,0.032078,0.029107,0.019541,-0.000979,-0.043553,0.059217,-0.021323,-0.021795,0.012713,-0.041548,-0.025159,0.007053,-0.046681,0.011992,-0.021492,-0.020409,-0.039861,-0.039493,-0.021282,-0.043401,-0.054954,-0.003071,-0.025961,-0.019901,-0.013816,0.008249,0.013152,0.050161,0.018735,0.011302,0.025814,0.038649,0.001601,-0.062521,0.034121,-0.018548,-0.008048,-0.045877,-0.045716,0.052964,0.023042,-0.02705,0.005686,0.011264,-0.020137,-0.028312,-0.012747,-0.025026,-0.028384,-0.050707,0.013471,0.00906,0.029816,-0.052157,0.039922,0.047676,0.010645,-0.088924,-0.022553,0.007775,0.013793,0.011874,0.001426,0.015962,0.004444,0.018199,0.020031,0.036024,0.004275,0.013852,-0.041363,0.030135,0.037383,0.043992,-0.01189,0.009296,-0.059358,0.028553,-0.004396,0.01155,-0.016681,0.011991,-0.057401,0.022944,0.002535,0.043138,0.015089,-0.00354,-0.02121,-0.000755,0.009067,0.021951,0.076348,0.009252,-0.041426,0.03537,-0.032637,-0.036216,-0.024516,0.013985,-0.006748,-0.043264,0.035988,0.042097,0.086749,-0.04612,-0.018097,-0.04304,0.026702,0.001524,0.026744,0.006809,0.047653,0.052771,-0.031369,-0.047697,-0.054508,-0.019475,0.00538,-0.007196,0.013082,0.017379,0.02973,0.044012,-0.020394,-0.071408,-0.000607,0.015265,0.017696,-0.041449,0.015911,-0.072653,0.01074,-0.032635,-0.015616,0.003619,-0.010637,0.043498,-0.00721
6,5,Cowboy Bebop: The Movie,1.0,0,8.41,120243,164,449,197791,776,"Another day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this routine is interrupted when Faye, who is chasing a fairly worthless target on Mars, witnesses an oil tanker suddenly explode, causing mass hysteria. As casualties mount due to a strange disease spreading through the smoke from the blast, a whopping three hundred million woolong price is placed on the head of the supposed perpetrator. With lives at stake and a solution to their money problems in sight, the Bebop crew springs into action. Spike, Jet, Faye, and Edward, followed closely by Ein, split up to pursue different leads across Alba City. Through their individual investigations, they discover a cover-up scheme involving a pharmaceutical company, revealing a plot that reaches much further than the ragtag team of bounty hunters could have realized. [Written by MAL Rewrite]",1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.034286,0.033996,0.007923,0.038971,-0.032601,-0.008406,0.014557,-0.058504,0.061666,0.059972,-0.005531,-0.069254,-0.009848,0.035814,-0.059442,0.031862,0.008733,0.053755,-0.005296,-0.013566,-0.016636,0.010033,-0.008637,0.007356,0.033484,-0.020868,-0.0584,0.063953,0.027596,0.005782,0.00043,-0.014363,-0.008073,-0.016039,-0.002752,-0.00802,0.005582,-0.012561,0.040396,0.035001,0.051261,-0.025975,0.035493,-0.020143,-0.016543,-0.04018,-0.034073,0.02998,0.042406,0.005518,0.001988,0.014691,0.008662,-0.025833,-0.010682,0.020041,-0.007578,-0.043492,0.018496,-0.048332,-0.018077,0.055186,-0.027436,-0.063601,-0.023318,-0.020217,0.013749,0.057316,-0.015806,0.037613,0.013163,-0.005739,0.068124,0.009256,-0.082062,-0.030137,0.044616,0.042589,-0.003027,0.055808,0.018468,-0.046793,0.020286,0.003879,-0.020348,-0.060697,-0.071962,0.049216,-0.003338,0.022231,0.048474,0.005185,-0.032714,-0.061541,-0.009559,-0.017448,0.007921,-0.005968,-0.001945,0.01134,0.010297,-0.053358,0.001377,-0.008766,-0.010449,-0.026977,-0.015875,-0.033238,0.024455,-0.043913,0.000204,-0.004407,0.001314,-0.015963,0.038036,0.015312,0.021343,0.005879,0.04642,0.048484,-0.078182,0.005844,-0.04027,0.004293,-0.02106,-0.007908,-0.023996,-0.057414,0.000126,0.046519,-0.007095,-0.037806,-0.049617,-0.024828,-0.004732,-0.033098,0.010501,-0.010643,-0.031625,0.034885,0.026372,-0.043046,0.009983,0.021871,0.013261,0.002401,-0.016962,-0.030833,-0.059017,-0.002722,0.046054,-0.004384,-0.034703,0.03347,-0.006124,-0.008277,-0.026149,-0.05044,-0.015401,-0.023633,-0.014713,0.048229,0.0123,0.014094,0.00016,-0.041214,0.029195,-0.014758,0.012803,-0.000423,-0.059128,-0.032127,-0.02514,-0.065613,-0.001999,-0.04045,0.012925,-0.037661,-0.018058,-0.011506,-0.040262,-0.039131,-0.016425,0.002404,-0.02114,-0.01366,-0.000422,0.032882,0.060541,0.043737,0.019885,-0.010006,0.020332,-0.001026,-0.064583,0.021543,-0.030945,-0.002331,-0.030033,-0.059975,0.054624,0.013319,-0.023197,-0.003349,-0.001056,0.000562,-0.031588,0.011888,0.022546,-0.029294,-0.020384,0.018045,-0.013425,0.036303,-0.044788,0.02021,0.044186,-0.008491,-0.061528,0.020645,0.001539,0.011159,0.012887,0.003158,0.034777,-0.023623,0.033818,0.018494,0.043964,-0.0059,0.037656,-0.034594,0.027668,0.024776,0.007987,-0.024402,0.011711,-0.028553,0.046212,0.021569,-0.005814,0.00927,0.003437,-0.058497,0.017021,-0.015073,0.047122,0.010372,0.008882,-0.01488,0.033415,-0.007068,0.032605,0.033175,0.01949,-0.026248,0.035107,0.000343,-0.030352,-0.024482,-0.002295,0.000352,-0.030998,0.03665,0.032162,0.082847,-0.02951,-6.9e-05,-0.036153,0.017187,0.022112,0.040818,0.015384,0.016423,0.038067,-0.034941,-0.04781,-0.046722,0.000735,0.007427,-0.009773,-0.008749,0.006419,0.036121,0.020225,-0.000781,-0.050707,0.017542,0.038131,-0.001431,-0.038774,0.008558,-0.053541,0.013464,-0.019009,0.013802,0.005209,-0.00686,0.036505,-0.017165
16,6,Trigun,26.0,Spring 1998,8.3,212537,255,146,408548,10432,"Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merciless villain who lays waste to all those that oppose him and flattens entire cities for fun, garnering him the title ""The Humanoid Typhoon."" He leaves a trail of death and destruction wherever he goes, and anyone can count themselves dead if they so much as make eye contact—or so the rumors say. In actuality, Vash is a huge softie who claims to have never taken a life and avoids violence at all costs. With his crazy doughnut obsession and buffoonish attitude in tow, Vash traverses the wasteland of the planet Gunsmoke, all the while followed by two insurance agents, Meryl Stryfe and Milly Thompson, who attempt to minimize his impact on the public. But soon, their misadventures evolve into life-or-death situations as a group of legendary assassins are summoned to bring about suffering to the trio. Vash's agonizing past will be unraveled and his morality and principles pushed to the breaking point. [Written by MAL Rewrite]",1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0.046075,0.051554,0.021494,0.042679,-0.033989,0.011614,0.016296,-0.056546,0.050516,0.041485,-0.011288,-0.066922,-0.007412,0.036506,-0.071361,0.035155,0.037431,0.073922,-0.014399,-0.016527,-0.009324,0.024258,0.003378,0.013267,0.048147,-0.026136,-0.056828,0.036333,0.028268,-0.005679,-0.006641,-0.017382,-0.03217,0.021557,-0.001583,0.000194,0.029126,-0.011996,0.046084,0.040868,0.077538,-0.046362,0.051705,-0.010982,-0.010465,-0.033262,-0.011877,-0.00061,0.064822,0.014219,-0.018036,0.041798,-0.014228,-0.027071,-0.003972,0.02302,-0.014469,-0.056487,0.007204,-0.032974,-0.002466,0.073331,-0.022479,-0.04996,-0.025193,-0.010937,-0.002071,0.044017,-0.032198,0.045328,0.034324,0.011872,0.03682,0.00782,-0.074346,-0.060799,0.032817,0.0566,0.02572,0.065892,0.016496,-0.039125,0.009787,-0.037298,-0.017261,-0.04606,-0.061661,0.07545,-0.002407,0.018012,0.022199,0.008195,-0.02463,-0.050168,-0.0231,-0.018181,0.031423,0.026293,0.005692,0.001506,-0.01677,-0.019027,0.019068,-0.000546,-0.008867,-0.038206,-0.007084,-0.018574,0.025321,-0.040857,-0.021163,-0.008961,0.006049,0.007165,0.034344,0.00619,-0.003011,-0.019504,0.049238,0.049304,-0.078634,0.018836,-0.031849,0.012006,-0.036743,-0.040874,-0.019633,-0.022892,0.016259,0.012799,-0.024395,-0.051546,-0.052428,-0.012789,-0.037025,-0.030559,-0.015095,-0.001077,-0.023634,0.026297,0.049129,-0.014973,0.00316,0.030085,0.031603,0.019252,-0.023041,-0.027789,-0.036158,0.01199,0.046321,0.020768,-0.042878,0.034118,-0.025504,-0.012411,-0.03065,-0.050731,0.003357,-0.034415,-0.004984,0.038504,0.033941,0.008554,-0.024119,-0.048377,0.028926,-0.017718,0.014117,-0.006992,-0.040932,-0.031195,-0.01844,-0.057449,-0.006374,-0.019075,0.025331,-0.046829,-0.000778,-0.011974,-0.057296,-0.036934,0.002215,0.007896,-0.026588,-0.005585,-0.013742,0.021777,0.068491,0.03676,0.049298,0.028468,0.038089,0.019071,-0.050208,0.001869,-0.032115,0.001188,-0.050244,-0.063678,0.027357,0.013809,-0.010756,-0.010191,0.020405,-0.004609,-0.014511,-0.002724,0.007486,-0.023171,-0.034837,0.015229,0.013848,0.014829,-0.078058,0.013982,0.018455,0.007793,-0.085684,-0.01429,0.00323,0.01507,0.010628,-0.004694,0.050373,-0.010192,0.03277,0.029111,0.03597,-0.006674,0.032799,-0.031484,0.028996,0.028729,0.036776,-0.015768,-0.003968,-0.022568,0.039227,-0.008952,0.010346,0.001793,0.001189,-0.061668,0.015997,0.013199,0.042566,0.021276,-0.000617,-0.012438,0.013163,0.003257,0.023913,0.048648,0.02407,-0.036067,0.022092,-0.018226,-0.038161,-0.029356,-0.016261,0.009063,-0.046335,0.046327,0.049616,0.080244,-0.028453,0.02287,-0.029462,0.000303,0.014516,0.047043,0.02484,0.023857,0.010588,-0.037943,-0.021131,-0.025452,-0.008461,0.015443,0.011314,-0.036393,0.017949,0.053921,0.018386,-0.000554,-0.073014,-0.001892,0.016432,0.015031,-0.049164,0.001321,-0.070184,0.002559,-0.05597,-0.004276,0.010292,-0.036921,0.017824,0.005914
22,7,Witch Hunter Robin,26.0,Summer 2002,7.33,32837,2371,1171,79397,537,"Witches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-year-old craft user, arrives from Italy to Japan to work for an organization named STN Japan Division (STN-J) as a replacement for one of STN-J's witch hunters who was recently killed. Unlike other divisions of STN, STN-J tries to capture the witches alive in order to learn why and how they became witches in the first place. (Source: ANN)",1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0.040084,0.043519,0.011331,0.013324,-0.049597,0.013523,-0.007891,-0.050229,0.015264,0.034034,0.007409,-0.047748,-0.013092,0.016576,-0.050811,0.035068,0.015911,0.051699,0.00418,-0.007916,0.004467,0.015594,-0.019425,-0.017235,0.013845,-0.012678,-0.064591,0.038377,0.003203,-0.008733,0.001605,0.000176,-0.025672,0.005307,0.022348,0.008782,-0.006471,-0.031222,0.043191,0.036688,0.071859,-0.018241,0.049503,-0.000172,0.010562,-0.028092,-0.003908,0.011069,0.00617,0.021578,-0.050432,0.031656,0.013686,-0.003417,-0.043547,0.030178,-0.018181,-0.038867,0.019866,-0.039338,0.005569,0.0609,-0.056638,-0.06827,-0.040912,-0.010647,0.005331,0.04417,-0.018075,0.039898,0.035091,0.014663,0.04517,0.019552,-0.023127,-0.050683,0.0525,0.051285,0.007713,0.068368,0.033391,-0.031139,0.009146,-0.009842,-0.013967,-0.055956,-0.070713,0.014257,0.023108,0.037685,0.02007,0.000528,-0.04622,-0.060177,0.014468,-0.030158,0.030222,-0.009351,-0.001452,-0.007641,-0.000649,-0.043326,0.005714,0.019813,0.008822,-0.021916,-0.038958,-0.021583,0.023121,-0.012859,-0.017479,0.023516,-0.026572,0.008006,0.027996,0.022325,0.030625,-0.002895,0.089061,0.010793,-0.061058,-0.000739,-0.016678,0.004759,-0.022246,-0.008839,-0.013216,-0.004543,-0.008449,0.020789,-0.008573,-0.05909,-0.058241,-0.036238,-0.025937,-0.018035,-0.00175,-0.048649,0.008116,0.012063,0.048729,-0.030933,0.037315,0.005854,0.057903,0.036704,-0.028571,-0.014908,-0.026466,-0.001457,0.031291,0.029841,-0.049659,0.046499,-0.003366,-0.032992,-0.030007,-0.041595,-0.001108,-0.017381,0.017134,0.028097,0.002674,-0.002948,0.005731,-0.043066,0.019577,-0.014403,0.002002,0.006388,-0.073542,-0.012233,-0.003693,-0.06872,-0.010161,-0.036937,0.031861,-0.040947,-0.004414,-0.011904,-0.035708,-0.059962,0.021938,-0.027605,0.000439,-0.003035,0.023976,-0.005226,0.06366,0.051885,-0.00708,-0.030403,0.035594,-0.011832,-0.02218,0.020135,0.015628,-0.036521,-0.058518,-0.089885,0.025523,-0.01144,-0.007256,-0.001377,-0.025598,-0.02816,-0.006038,-0.008614,0.018424,-0.035472,-0.055351,0.003703,-0.00856,0.026524,-0.063033,0.023271,0.028832,-0.013214,-0.054398,-0.013727,0.002572,0.010427,0.01055,-0.015907,0.022653,-0.019953,0.006097,0.011575,0.027497,-0.009629,0.02892,-0.026982,0.002148,0.033218,0.025723,-0.012843,-0.002086,-0.02892,0.019965,-0.007975,-0.001668,-0.033926,0.013474,-0.02342,0.006162,-0.006549,0.011702,0.031006,0.011067,-0.024196,0.002276,0.019851,0.042411,0.024429,0.020638,0.006351,0.032375,-0.024066,-0.001797,-0.027247,-0.005463,0.012498,-0.038402,0.038218,0.022269,0.083581,-0.021,0.016294,-0.023244,0.020538,0.038338,0.030338,0.02599,0.051838,0.062437,-0.042235,-0.040669,-0.05852,-0.030266,0.00729,-0.00091,-0.012541,0.014318,0.052671,0.010704,0.006528,-0.060641,-0.027508,0.013195,0.003946,-0.030247,-0.008901,-0.079197,3.1e-05,-0.024609,-0.008391,0.026849,-0.003479,0.048176,-0.00202
28,8,Beet the Vandel Buster,52.0,Fall 2004,7.03,4894,3544,3704,11708,14,"It is the dark century and the people are suffering under the rule of the devil, Vandel, who is able to manipulate monsters. The Vandel Busters are a group of people who hunt these devils, and among them, the Zenon Squad is known to be the strongest busters on the continent. A young boy, Beet, dreams of joining the Zenon Squad. However, one day, as a result of Beet's fault, the Zenon squad was defeated by the devil, Beltose. The five dying busters sacrificed their life power into their five weapons, Saiga. After giving their weapons to Beet, they passed away. Years have passed since then and the young Vandel Buster, Beet, begins his adventure to carry out the Zenon Squad's will to put an end to the dark century.",0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0.033509,0.073026,0.032067,0.033866,-0.035534,-0.023383,0.004171,-0.052128,0.036317,0.038405,0.016212,-0.049269,-0.018687,0.029037,-0.059199,0.026409,0.03487,0.035396,-0.019874,-0.012742,-0.011865,0.022725,-0.021582,0.005387,0.040823,-0.016987,-0.052997,0.037102,0.014931,-0.002932,-0.007679,0.002017,-0.019605,0.008567,0.021359,-5.7e-05,-0.006793,-0.02099,0.040467,0.028692,0.049575,-0.013557,0.054332,-0.012396,0.006611,-0.022289,-0.027113,0.008252,0.038436,0.017671,-0.034137,0.036846,0.006656,-0.01432,0.004698,0.007955,-0.023433,-0.04209,-0.010677,-0.015413,-0.022483,0.056229,-0.047642,-0.049667,-0.027916,-0.019393,0.027552,0.071266,-0.015817,0.049222,0.028127,-0.005339,0.039752,0.014397,-0.046257,-0.055838,0.044878,0.054782,0.015369,0.086399,0.029277,-0.061364,0.030693,-0.000921,-0.015918,-0.022378,-0.084741,0.054159,-0.010346,0.015542,0.030269,0.001458,-0.025678,-0.065957,-0.017067,-0.013187,0.019158,0.015513,-0.009476,-0.005244,0.00691,-0.031384,-0.000513,-0.002002,-0.015753,-0.041126,-0.010517,-0.039384,-0.002956,-0.037395,-0.003931,0.024675,-0.013818,0.001268,0.031092,0.027338,-0.000107,-0.001816,0.03866,0.027115,-0.073232,-0.014135,-0.043308,-0.011086,-0.018441,-0.016184,-0.009338,-0.041386,0.005259,0.048489,0.016264,-0.065162,-0.068298,-0.017547,-0.036227,-0.037886,-0.003537,-0.018665,-0.035893,0.02881,0.018681,-0.038282,0.004194,-0.005613,0.011336,0.024751,-0.012344,-0.043308,-0.03074,0.011236,0.040221,0.003207,-0.051683,0.036712,0.016157,0.019987,-0.022006,-0.056219,-0.010696,-0.038547,-0.003561,0.03321,0.023879,0.003132,0.001547,-0.052919,0.004402,0.011646,0.016467,-0.018126,-0.070273,-0.040453,-0.031468,-0.082942,-0.003322,-0.008578,0.023592,-0.018896,-0.003234,-0.020846,-0.023816,-0.043606,0.012595,0.009152,-0.036216,0.000923,0.00013,0.011515,0.090346,0.017398,0.013981,0.038001,0.030625,-0.009913,-0.054036,0.012278,0.008175,0.011279,-0.051389,-0.073863,0.037429,0.011746,-0.003337,-0.006695,-0.007426,0.002193,-0.008486,-0.001289,0.005537,-0.028401,-0.022333,0.01798,-0.001913,0.030877,-0.045051,0.014192,0.038983,-0.018247,-0.076112,0.005695,-0.003393,0.001793,0.012,-0.020973,0.063947,-0.0235,0.017504,0.023846,0.018791,0.005042,0.026277,-0.040921,0.028756,0.057596,0.018601,-0.019259,0.003439,-0.032663,0.021317,-0.007353,0.018049,0.004357,-0.001511,-0.05295,0.014557,0.005914,0.045339,0.020491,0.01163,0.011417,0.026786,-0.005696,0.044519,0.056356,0.019246,-0.022986,0.027986,-0.019878,-0.03283,-0.027301,-0.001957,0.000672,-0.025853,0.049954,0.030327,0.085031,-0.04761,0.040933,-0.038543,0.025517,0.036078,0.032642,0.017922,0.042792,0.049234,-0.037459,-0.043644,-0.058459,-0.013252,-0.010419,0.003661,-0.000534,0.033982,0.031032,0.017119,0.00261,-0.059511,0.015335,0.002775,-0.015049,-0.043467,-0.002912,-0.069303,0.010295,-0.042858,-0.009359,-0.01692,-0.021534,0.047166,-0.012552


In [104]:
simp_cols = [col for col in list(reduced_df.columns) if ((col != 'title_english') & (col != 'synopsis'))]
more_reduced_df = reduced_df[simp_cols]
print(more_reduced_df.shape)
more_reduced_df.head()

(2855, 409)


Unnamed: 0,animeID,episodes,premiered,score,scored_by,rank,popularity,members,favorites,genre_Action,genre_Adventure,genre_Comedy,genre_Dementia,genre_Drama,genre_Ecchi,genre_Fantasy,genre_Game,genre_Harem,genre_Historical,genre_Horror,genre_Kids,genre_Magic,genre_Military,genre_Music,genre_Mystery,genre_Psychological,genre_Romance,genre_Sci-Fi,genre_Slice of Life,genre_Sports,"studio_""Brains Base""",studio_A-1 Pictures,studio_AIC,studio_Arms,studio_Bones,studio_Diomedea,studio_Doga Kobo,studio_Gainax,studio_Gonzo,studio_J.C.Staff,studio_Kyoto Animation,studio_MAPPA,studio_Madhouse,studio_Manglobe,studio_Nippon Animation,studio_OLM,studio_P.A. Works,studio_Production I.G,studio_Production Reed,studio_Satelight,studio_Shaft,studio_Shin-Ei Animation,studio_Silver Link.,studio_Studio Deen,studio_Studio Gallop,studio_Studio Gokumi,studio_Studio Pierrot,studio_Sunrise,studio_TMS Entertainment,studio_TNK,studio_Tatsunoko Production,studio_Toei Animation,studio_Xebec,studio_Zexcs,studio_feel.,studio_ufotable,source_Game,source_Light novel,source_Manga,source_Novel,source_Original,source_Unknown,source_Visual novel,producer_Aniplex,producer_Bandai Visual,producer_DAX Production,producer_Dentsu,producer_Frontier Works,producer_Fuji TV,producer_Genco,producer_Geneon Universal Entertainment,producer_Kadokawa Shoten,producer_Lantis,producer_Media Factory,producer_Movic,producer_NHK,producer_Nihon Ad Systems,producer_Pony Canyon,producer_Production I.G,producer_Shochiku,producer_Shogakukan Productions,producer_Sotsu,producer_Starchild Records,producer_TBS,producer_TV Asahi,producer_TV Tokyo,producer_VAP,producer_Victor Entertainment,rating_G - All Ages,rating_PG - Children,rating_PG-13 - Teens 13 or older,rating_R - 17+ (violence & profanity),rating_R+ - Mild Nudity,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,synopsis_embedded_1,synopsis_embedded_2,synopsis_embedded_3,synopsis_embedded_4,synopsis_embedded_5,synopsis_embedded_6,synopsis_embedded_7,synopsis_embedded_8,synopsis_embedded_9,synopsis_embedded_10,synopsis_embedded_11,synopsis_embedded_12,synopsis_embedded_13,synopsis_embedded_14,synopsis_embedded_15,synopsis_embedded_16,synopsis_embedded_17,synopsis_embedded_18,synopsis_embedded_19,synopsis_embedded_20,synopsis_embedded_21,synopsis_embedded_22,synopsis_embedded_23,synopsis_embedded_24,synopsis_embedded_25,synopsis_embedded_26,synopsis_embedded_27,synopsis_embedded_28,synopsis_embedded_29,synopsis_embedded_30,synopsis_embedded_31,synopsis_embedded_32,synopsis_embedded_33,synopsis_embedded_34,synopsis_embedded_35,synopsis_embedded_36,synopsis_embedded_37,synopsis_embedded_38,synopsis_embedded_39,synopsis_embedded_40,synopsis_embedded_41,synopsis_embedded_42,synopsis_embedded_43,synopsis_embedded_44,synopsis_embedded_45,synopsis_embedded_46,synopsis_embedded_47,synopsis_embedded_48,synopsis_embedded_49,synopsis_embedded_50,synopsis_embedded_51,synopsis_embedded_52,synopsis_embedded_53,synopsis_embedded_54,synopsis_embedded_55,synopsis_embedded_56,synopsis_embedded_57,synopsis_embedded_58,synopsis_embedded_59,synopsis_embedded_60,synopsis_embedded_61,synopsis_embedded_62,synopsis_embedded_63,synopsis_embedded_64,synopsis_embedded_65,synopsis_embedded_66,synopsis_embedded_67,synopsis_embedded_68,synopsis_embedded_69,synopsis_embedded_70,synopsis_embedded_71,synopsis_embedded_72,synopsis_embedded_73,synopsis_embedded_74,synopsis_embedded_75,synopsis_embedded_76,synopsis_embedded_77,synopsis_embedded_78,synopsis_embedded_79,synopsis_embedded_80,synopsis_embedded_81,synopsis_embedded_82,synopsis_embedded_83,synopsis_embedded_84,synopsis_embedded_85,synopsis_embedded_86,synopsis_embedded_87,synopsis_embedded_88,synopsis_embedded_89,synopsis_embedded_90,synopsis_embedded_91,synopsis_embedded_92,synopsis_embedded_93,synopsis_embedded_94,synopsis_embedded_95,synopsis_embedded_96,synopsis_embedded_97,synopsis_embedded_98,synopsis_embedded_99,synopsis_embedded_100,synopsis_embedded_101,synopsis_embedded_102,synopsis_embedded_103,synopsis_embedded_104,synopsis_embedded_105,synopsis_embedded_106,synopsis_embedded_107,synopsis_embedded_108,synopsis_embedded_109,synopsis_embedded_110,synopsis_embedded_111,synopsis_embedded_112,synopsis_embedded_113,synopsis_embedded_114,synopsis_embedded_115,synopsis_embedded_116,synopsis_embedded_117,synopsis_embedded_118,synopsis_embedded_119,synopsis_embedded_120,synopsis_embedded_121,synopsis_embedded_122,synopsis_embedded_123,synopsis_embedded_124,synopsis_embedded_125,synopsis_embedded_126,synopsis_embedded_127,synopsis_embedded_128,synopsis_embedded_129,synopsis_embedded_130,synopsis_embedded_131,synopsis_embedded_132,synopsis_embedded_133,synopsis_embedded_134,synopsis_embedded_135,synopsis_embedded_136,synopsis_embedded_137,synopsis_embedded_138,synopsis_embedded_139,synopsis_embedded_140,synopsis_embedded_141,synopsis_embedded_142,synopsis_embedded_143,synopsis_embedded_144,synopsis_embedded_145,synopsis_embedded_146,synopsis_embedded_147,synopsis_embedded_148,synopsis_embedded_149,synopsis_embedded_150,synopsis_embedded_151,synopsis_embedded_152,synopsis_embedded_153,synopsis_embedded_154,synopsis_embedded_155,synopsis_embedded_156,synopsis_embedded_157,synopsis_embedded_158,synopsis_embedded_159,synopsis_embedded_160,synopsis_embedded_161,synopsis_embedded_162,synopsis_embedded_163,synopsis_embedded_164,synopsis_embedded_165,synopsis_embedded_166,synopsis_embedded_167,synopsis_embedded_168,synopsis_embedded_169,synopsis_embedded_170,synopsis_embedded_171,synopsis_embedded_172,synopsis_embedded_173,synopsis_embedded_174,synopsis_embedded_175,synopsis_embedded_176,synopsis_embedded_177,synopsis_embedded_178,synopsis_embedded_179,synopsis_embedded_180,synopsis_embedded_181,synopsis_embedded_182,synopsis_embedded_183,synopsis_embedded_184,synopsis_embedded_185,synopsis_embedded_186,synopsis_embedded_187,synopsis_embedded_188,synopsis_embedded_189,synopsis_embedded_190,synopsis_embedded_191,synopsis_embedded_192,synopsis_embedded_193,synopsis_embedded_194,synopsis_embedded_195,synopsis_embedded_196,synopsis_embedded_197,synopsis_embedded_198,synopsis_embedded_199,synopsis_embedded_200,synopsis_embedded_201,synopsis_embedded_202,synopsis_embedded_203,synopsis_embedded_204,synopsis_embedded_205,synopsis_embedded_206,synopsis_embedded_207,synopsis_embedded_208,synopsis_embedded_209,synopsis_embedded_210,synopsis_embedded_211,synopsis_embedded_212,synopsis_embedded_213,synopsis_embedded_214,synopsis_embedded_215,synopsis_embedded_216,synopsis_embedded_217,synopsis_embedded_218,synopsis_embedded_219,synopsis_embedded_220,synopsis_embedded_221,synopsis_embedded_222,synopsis_embedded_223,synopsis_embedded_224,synopsis_embedded_225,synopsis_embedded_226,synopsis_embedded_227,synopsis_embedded_228,synopsis_embedded_229,synopsis_embedded_230,synopsis_embedded_231,synopsis_embedded_232,synopsis_embedded_233,synopsis_embedded_234,synopsis_embedded_235,synopsis_embedded_236,synopsis_embedded_237,synopsis_embedded_238,synopsis_embedded_239,synopsis_embedded_240,synopsis_embedded_241,synopsis_embedded_242,synopsis_embedded_243,synopsis_embedded_244,synopsis_embedded_245,synopsis_embedded_246,synopsis_embedded_247,synopsis_embedded_248,synopsis_embedded_249,synopsis_embedded_250,synopsis_embedded_251,synopsis_embedded_252,synopsis_embedded_253,synopsis_embedded_254,synopsis_embedded_255,synopsis_embedded_256,synopsis_embedded_257,synopsis_embedded_258,synopsis_embedded_259,synopsis_embedded_260,synopsis_embedded_261,synopsis_embedded_262,synopsis_embedded_263,synopsis_embedded_264,synopsis_embedded_265,synopsis_embedded_266,synopsis_embedded_267,synopsis_embedded_268,synopsis_embedded_269,synopsis_embedded_270,synopsis_embedded_271,synopsis_embedded_272,synopsis_embedded_273,synopsis_embedded_274,synopsis_embedded_275,synopsis_embedded_276,synopsis_embedded_277,synopsis_embedded_278,synopsis_embedded_279,synopsis_embedded_280,synopsis_embedded_281,synopsis_embedded_282,synopsis_embedded_283,synopsis_embedded_284,synopsis_embedded_285,synopsis_embedded_286,synopsis_embedded_287,synopsis_embedded_288,synopsis_embedded_289,synopsis_embedded_290,synopsis_embedded_291,synopsis_embedded_292,synopsis_embedded_293,synopsis_embedded_294,synopsis_embedded_295,synopsis_embedded_296,synopsis_embedded_297,synopsis_embedded_298,synopsis_embedded_299,synopsis_embedded_300
0,1,26.0,Spring 1998,8.81,405664,26,39,795733,43460,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.029809,0.042396,0.022913,0.040259,-0.039149,-0.011959,0.016059,-0.064689,0.050221,0.045491,0.012603,-0.046489,-0.007768,0.011972,-0.054675,0.034183,0.008571,0.073747,-0.006751,0.002686,0.001647,0.037568,-0.011122,-0.006467,0.034074,-0.00513,-0.067323,0.057732,0.03045,-0.02217,-0.002778,-0.025179,-0.017223,0.027981,0.00732,-0.016938,0.013608,-0.023168,0.032683,0.038371,0.065416,-0.013103,0.03174,0.029739,0.01164,-0.045497,-0.009923,0.005677,0.060461,0.029093,-0.022794,0.022537,-0.005312,-0.041699,-0.006712,0.044132,-0.008567,-0.077645,0.002706,-0.033053,-0.029164,0.064243,-0.054654,-0.049431,-0.03452,-0.013828,-0.004685,0.052337,-0.035267,0.0586,0.041502,-0.026901,0.022177,0.002181,-0.070948,-0.03016,0.046594,0.037892,0.00576,0.05166,0.030118,-0.051224,0.014102,-0.040939,-0.000171,-0.049184,-0.062894,0.059879,0.020875,0.035167,0.009019,-0.015971,-0.048447,-0.048976,-0.000485,0.011175,0.030076,0.025286,0.000305,-0.032328,0.017055,-0.033233,-0.002771,-0.001819,-0.021702,-0.0222,0.005008,-0.025583,-0.012564,-0.034225,-0.014823,0.017698,0.013105,0.008766,0.030944,0.016009,0.006116,-0.004369,0.063656,0.03222,-0.077808,-0.008884,-0.035245,-0.016034,-0.026342,-0.010016,-0.043281,-0.005266,0.00396,0.039512,-0.000114,-0.042252,-0.050982,0.000868,-0.025522,0.005533,-0.003248,-0.011425,-0.017867,0.058968,0.047962,-0.035847,-0.022813,0.030239,0.020939,0.045603,-0.00866,0.006972,-0.026107,0.018222,0.041626,-0.014078,-0.041253,0.043558,-0.007807,-0.03033,-0.008229,-0.045358,0.008506,-0.04455,-0.016403,0.032078,0.029107,0.019541,-0.000979,-0.043553,0.059217,-0.021323,-0.021795,0.012713,-0.041548,-0.025159,0.007053,-0.046681,0.011992,-0.021492,-0.020409,-0.039861,-0.039493,-0.021282,-0.043401,-0.054954,-0.003071,-0.025961,-0.019901,-0.013816,0.008249,0.013152,0.050161,0.018735,0.011302,0.025814,0.038649,0.001601,-0.062521,0.034121,-0.018548,-0.008048,-0.045877,-0.045716,0.052964,0.023042,-0.02705,0.005686,0.011264,-0.020137,-0.028312,-0.012747,-0.025026,-0.028384,-0.050707,0.013471,0.00906,0.029816,-0.052157,0.039922,0.047676,0.010645,-0.088924,-0.022553,0.007775,0.013793,0.011874,0.001426,0.015962,0.004444,0.018199,0.020031,0.036024,0.004275,0.013852,-0.041363,0.030135,0.037383,0.043992,-0.01189,0.009296,-0.059358,0.028553,-0.004396,0.01155,-0.016681,0.011991,-0.057401,0.022944,0.002535,0.043138,0.015089,-0.00354,-0.02121,-0.000755,0.009067,0.021951,0.076348,0.009252,-0.041426,0.03537,-0.032637,-0.036216,-0.024516,0.013985,-0.006748,-0.043264,0.035988,0.042097,0.086749,-0.04612,-0.018097,-0.04304,0.026702,0.001524,0.026744,0.006809,0.047653,0.052771,-0.031369,-0.047697,-0.054508,-0.019475,0.00538,-0.007196,0.013082,0.017379,0.02973,0.044012,-0.020394,-0.071408,-0.000607,0.015265,0.017696,-0.041449,0.015911,-0.072653,0.01074,-0.032635,-0.015616,0.003619,-0.010637,0.043498,-0.00721
6,5,1.0,0,8.41,120243,164,449,197791,776,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.034286,0.033996,0.007923,0.038971,-0.032601,-0.008406,0.014557,-0.058504,0.061666,0.059972,-0.005531,-0.069254,-0.009848,0.035814,-0.059442,0.031862,0.008733,0.053755,-0.005296,-0.013566,-0.016636,0.010033,-0.008637,0.007356,0.033484,-0.020868,-0.0584,0.063953,0.027596,0.005782,0.00043,-0.014363,-0.008073,-0.016039,-0.002752,-0.00802,0.005582,-0.012561,0.040396,0.035001,0.051261,-0.025975,0.035493,-0.020143,-0.016543,-0.04018,-0.034073,0.02998,0.042406,0.005518,0.001988,0.014691,0.008662,-0.025833,-0.010682,0.020041,-0.007578,-0.043492,0.018496,-0.048332,-0.018077,0.055186,-0.027436,-0.063601,-0.023318,-0.020217,0.013749,0.057316,-0.015806,0.037613,0.013163,-0.005739,0.068124,0.009256,-0.082062,-0.030137,0.044616,0.042589,-0.003027,0.055808,0.018468,-0.046793,0.020286,0.003879,-0.020348,-0.060697,-0.071962,0.049216,-0.003338,0.022231,0.048474,0.005185,-0.032714,-0.061541,-0.009559,-0.017448,0.007921,-0.005968,-0.001945,0.01134,0.010297,-0.053358,0.001377,-0.008766,-0.010449,-0.026977,-0.015875,-0.033238,0.024455,-0.043913,0.000204,-0.004407,0.001314,-0.015963,0.038036,0.015312,0.021343,0.005879,0.04642,0.048484,-0.078182,0.005844,-0.04027,0.004293,-0.02106,-0.007908,-0.023996,-0.057414,0.000126,0.046519,-0.007095,-0.037806,-0.049617,-0.024828,-0.004732,-0.033098,0.010501,-0.010643,-0.031625,0.034885,0.026372,-0.043046,0.009983,0.021871,0.013261,0.002401,-0.016962,-0.030833,-0.059017,-0.002722,0.046054,-0.004384,-0.034703,0.03347,-0.006124,-0.008277,-0.026149,-0.05044,-0.015401,-0.023633,-0.014713,0.048229,0.0123,0.014094,0.00016,-0.041214,0.029195,-0.014758,0.012803,-0.000423,-0.059128,-0.032127,-0.02514,-0.065613,-0.001999,-0.04045,0.012925,-0.037661,-0.018058,-0.011506,-0.040262,-0.039131,-0.016425,0.002404,-0.02114,-0.01366,-0.000422,0.032882,0.060541,0.043737,0.019885,-0.010006,0.020332,-0.001026,-0.064583,0.021543,-0.030945,-0.002331,-0.030033,-0.059975,0.054624,0.013319,-0.023197,-0.003349,-0.001056,0.000562,-0.031588,0.011888,0.022546,-0.029294,-0.020384,0.018045,-0.013425,0.036303,-0.044788,0.02021,0.044186,-0.008491,-0.061528,0.020645,0.001539,0.011159,0.012887,0.003158,0.034777,-0.023623,0.033818,0.018494,0.043964,-0.0059,0.037656,-0.034594,0.027668,0.024776,0.007987,-0.024402,0.011711,-0.028553,0.046212,0.021569,-0.005814,0.00927,0.003437,-0.058497,0.017021,-0.015073,0.047122,0.010372,0.008882,-0.01488,0.033415,-0.007068,0.032605,0.033175,0.01949,-0.026248,0.035107,0.000343,-0.030352,-0.024482,-0.002295,0.000352,-0.030998,0.03665,0.032162,0.082847,-0.02951,-6.9e-05,-0.036153,0.017187,0.022112,0.040818,0.015384,0.016423,0.038067,-0.034941,-0.04781,-0.046722,0.000735,0.007427,-0.009773,-0.008749,0.006419,0.036121,0.020225,-0.000781,-0.050707,0.017542,0.038131,-0.001431,-0.038774,0.008558,-0.053541,0.013464,-0.019009,0.013802,0.005209,-0.00686,0.036505,-0.017165
16,6,26.0,Spring 1998,8.3,212537,255,146,408548,10432,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0.046075,0.051554,0.021494,0.042679,-0.033989,0.011614,0.016296,-0.056546,0.050516,0.041485,-0.011288,-0.066922,-0.007412,0.036506,-0.071361,0.035155,0.037431,0.073922,-0.014399,-0.016527,-0.009324,0.024258,0.003378,0.013267,0.048147,-0.026136,-0.056828,0.036333,0.028268,-0.005679,-0.006641,-0.017382,-0.03217,0.021557,-0.001583,0.000194,0.029126,-0.011996,0.046084,0.040868,0.077538,-0.046362,0.051705,-0.010982,-0.010465,-0.033262,-0.011877,-0.00061,0.064822,0.014219,-0.018036,0.041798,-0.014228,-0.027071,-0.003972,0.02302,-0.014469,-0.056487,0.007204,-0.032974,-0.002466,0.073331,-0.022479,-0.04996,-0.025193,-0.010937,-0.002071,0.044017,-0.032198,0.045328,0.034324,0.011872,0.03682,0.00782,-0.074346,-0.060799,0.032817,0.0566,0.02572,0.065892,0.016496,-0.039125,0.009787,-0.037298,-0.017261,-0.04606,-0.061661,0.07545,-0.002407,0.018012,0.022199,0.008195,-0.02463,-0.050168,-0.0231,-0.018181,0.031423,0.026293,0.005692,0.001506,-0.01677,-0.019027,0.019068,-0.000546,-0.008867,-0.038206,-0.007084,-0.018574,0.025321,-0.040857,-0.021163,-0.008961,0.006049,0.007165,0.034344,0.00619,-0.003011,-0.019504,0.049238,0.049304,-0.078634,0.018836,-0.031849,0.012006,-0.036743,-0.040874,-0.019633,-0.022892,0.016259,0.012799,-0.024395,-0.051546,-0.052428,-0.012789,-0.037025,-0.030559,-0.015095,-0.001077,-0.023634,0.026297,0.049129,-0.014973,0.00316,0.030085,0.031603,0.019252,-0.023041,-0.027789,-0.036158,0.01199,0.046321,0.020768,-0.042878,0.034118,-0.025504,-0.012411,-0.03065,-0.050731,0.003357,-0.034415,-0.004984,0.038504,0.033941,0.008554,-0.024119,-0.048377,0.028926,-0.017718,0.014117,-0.006992,-0.040932,-0.031195,-0.01844,-0.057449,-0.006374,-0.019075,0.025331,-0.046829,-0.000778,-0.011974,-0.057296,-0.036934,0.002215,0.007896,-0.026588,-0.005585,-0.013742,0.021777,0.068491,0.03676,0.049298,0.028468,0.038089,0.019071,-0.050208,0.001869,-0.032115,0.001188,-0.050244,-0.063678,0.027357,0.013809,-0.010756,-0.010191,0.020405,-0.004609,-0.014511,-0.002724,0.007486,-0.023171,-0.034837,0.015229,0.013848,0.014829,-0.078058,0.013982,0.018455,0.007793,-0.085684,-0.01429,0.00323,0.01507,0.010628,-0.004694,0.050373,-0.010192,0.03277,0.029111,0.03597,-0.006674,0.032799,-0.031484,0.028996,0.028729,0.036776,-0.015768,-0.003968,-0.022568,0.039227,-0.008952,0.010346,0.001793,0.001189,-0.061668,0.015997,0.013199,0.042566,0.021276,-0.000617,-0.012438,0.013163,0.003257,0.023913,0.048648,0.02407,-0.036067,0.022092,-0.018226,-0.038161,-0.029356,-0.016261,0.009063,-0.046335,0.046327,0.049616,0.080244,-0.028453,0.02287,-0.029462,0.000303,0.014516,0.047043,0.02484,0.023857,0.010588,-0.037943,-0.021131,-0.025452,-0.008461,0.015443,0.011314,-0.036393,0.017949,0.053921,0.018386,-0.000554,-0.073014,-0.001892,0.016432,0.015031,-0.049164,0.001321,-0.070184,0.002559,-0.05597,-0.004276,0.010292,-0.036921,0.017824,0.005914
22,7,26.0,Summer 2002,7.33,32837,2371,1171,79397,537,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0.040084,0.043519,0.011331,0.013324,-0.049597,0.013523,-0.007891,-0.050229,0.015264,0.034034,0.007409,-0.047748,-0.013092,0.016576,-0.050811,0.035068,0.015911,0.051699,0.00418,-0.007916,0.004467,0.015594,-0.019425,-0.017235,0.013845,-0.012678,-0.064591,0.038377,0.003203,-0.008733,0.001605,0.000176,-0.025672,0.005307,0.022348,0.008782,-0.006471,-0.031222,0.043191,0.036688,0.071859,-0.018241,0.049503,-0.000172,0.010562,-0.028092,-0.003908,0.011069,0.00617,0.021578,-0.050432,0.031656,0.013686,-0.003417,-0.043547,0.030178,-0.018181,-0.038867,0.019866,-0.039338,0.005569,0.0609,-0.056638,-0.06827,-0.040912,-0.010647,0.005331,0.04417,-0.018075,0.039898,0.035091,0.014663,0.04517,0.019552,-0.023127,-0.050683,0.0525,0.051285,0.007713,0.068368,0.033391,-0.031139,0.009146,-0.009842,-0.013967,-0.055956,-0.070713,0.014257,0.023108,0.037685,0.02007,0.000528,-0.04622,-0.060177,0.014468,-0.030158,0.030222,-0.009351,-0.001452,-0.007641,-0.000649,-0.043326,0.005714,0.019813,0.008822,-0.021916,-0.038958,-0.021583,0.023121,-0.012859,-0.017479,0.023516,-0.026572,0.008006,0.027996,0.022325,0.030625,-0.002895,0.089061,0.010793,-0.061058,-0.000739,-0.016678,0.004759,-0.022246,-0.008839,-0.013216,-0.004543,-0.008449,0.020789,-0.008573,-0.05909,-0.058241,-0.036238,-0.025937,-0.018035,-0.00175,-0.048649,0.008116,0.012063,0.048729,-0.030933,0.037315,0.005854,0.057903,0.036704,-0.028571,-0.014908,-0.026466,-0.001457,0.031291,0.029841,-0.049659,0.046499,-0.003366,-0.032992,-0.030007,-0.041595,-0.001108,-0.017381,0.017134,0.028097,0.002674,-0.002948,0.005731,-0.043066,0.019577,-0.014403,0.002002,0.006388,-0.073542,-0.012233,-0.003693,-0.06872,-0.010161,-0.036937,0.031861,-0.040947,-0.004414,-0.011904,-0.035708,-0.059962,0.021938,-0.027605,0.000439,-0.003035,0.023976,-0.005226,0.06366,0.051885,-0.00708,-0.030403,0.035594,-0.011832,-0.02218,0.020135,0.015628,-0.036521,-0.058518,-0.089885,0.025523,-0.01144,-0.007256,-0.001377,-0.025598,-0.02816,-0.006038,-0.008614,0.018424,-0.035472,-0.055351,0.003703,-0.00856,0.026524,-0.063033,0.023271,0.028832,-0.013214,-0.054398,-0.013727,0.002572,0.010427,0.01055,-0.015907,0.022653,-0.019953,0.006097,0.011575,0.027497,-0.009629,0.02892,-0.026982,0.002148,0.033218,0.025723,-0.012843,-0.002086,-0.02892,0.019965,-0.007975,-0.001668,-0.033926,0.013474,-0.02342,0.006162,-0.006549,0.011702,0.031006,0.011067,-0.024196,0.002276,0.019851,0.042411,0.024429,0.020638,0.006351,0.032375,-0.024066,-0.001797,-0.027247,-0.005463,0.012498,-0.038402,0.038218,0.022269,0.083581,-0.021,0.016294,-0.023244,0.020538,0.038338,0.030338,0.02599,0.051838,0.062437,-0.042235,-0.040669,-0.05852,-0.030266,0.00729,-0.00091,-0.012541,0.014318,0.052671,0.010704,0.006528,-0.060641,-0.027508,0.013195,0.003946,-0.030247,-0.008901,-0.079197,3.1e-05,-0.024609,-0.008391,0.026849,-0.003479,0.048176,-0.00202
28,8,52.0,Fall 2004,7.03,4894,3544,3704,11708,14,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0.033509,0.073026,0.032067,0.033866,-0.035534,-0.023383,0.004171,-0.052128,0.036317,0.038405,0.016212,-0.049269,-0.018687,0.029037,-0.059199,0.026409,0.03487,0.035396,-0.019874,-0.012742,-0.011865,0.022725,-0.021582,0.005387,0.040823,-0.016987,-0.052997,0.037102,0.014931,-0.002932,-0.007679,0.002017,-0.019605,0.008567,0.021359,-5.7e-05,-0.006793,-0.02099,0.040467,0.028692,0.049575,-0.013557,0.054332,-0.012396,0.006611,-0.022289,-0.027113,0.008252,0.038436,0.017671,-0.034137,0.036846,0.006656,-0.01432,0.004698,0.007955,-0.023433,-0.04209,-0.010677,-0.015413,-0.022483,0.056229,-0.047642,-0.049667,-0.027916,-0.019393,0.027552,0.071266,-0.015817,0.049222,0.028127,-0.005339,0.039752,0.014397,-0.046257,-0.055838,0.044878,0.054782,0.015369,0.086399,0.029277,-0.061364,0.030693,-0.000921,-0.015918,-0.022378,-0.084741,0.054159,-0.010346,0.015542,0.030269,0.001458,-0.025678,-0.065957,-0.017067,-0.013187,0.019158,0.015513,-0.009476,-0.005244,0.00691,-0.031384,-0.000513,-0.002002,-0.015753,-0.041126,-0.010517,-0.039384,-0.002956,-0.037395,-0.003931,0.024675,-0.013818,0.001268,0.031092,0.027338,-0.000107,-0.001816,0.03866,0.027115,-0.073232,-0.014135,-0.043308,-0.011086,-0.018441,-0.016184,-0.009338,-0.041386,0.005259,0.048489,0.016264,-0.065162,-0.068298,-0.017547,-0.036227,-0.037886,-0.003537,-0.018665,-0.035893,0.02881,0.018681,-0.038282,0.004194,-0.005613,0.011336,0.024751,-0.012344,-0.043308,-0.03074,0.011236,0.040221,0.003207,-0.051683,0.036712,0.016157,0.019987,-0.022006,-0.056219,-0.010696,-0.038547,-0.003561,0.03321,0.023879,0.003132,0.001547,-0.052919,0.004402,0.011646,0.016467,-0.018126,-0.070273,-0.040453,-0.031468,-0.082942,-0.003322,-0.008578,0.023592,-0.018896,-0.003234,-0.020846,-0.023816,-0.043606,0.012595,0.009152,-0.036216,0.000923,0.00013,0.011515,0.090346,0.017398,0.013981,0.038001,0.030625,-0.009913,-0.054036,0.012278,0.008175,0.011279,-0.051389,-0.073863,0.037429,0.011746,-0.003337,-0.006695,-0.007426,0.002193,-0.008486,-0.001289,0.005537,-0.028401,-0.022333,0.01798,-0.001913,0.030877,-0.045051,0.014192,0.038983,-0.018247,-0.076112,0.005695,-0.003393,0.001793,0.012,-0.020973,0.063947,-0.0235,0.017504,0.023846,0.018791,0.005042,0.026277,-0.040921,0.028756,0.057596,0.018601,-0.019259,0.003439,-0.032663,0.021317,-0.007353,0.018049,0.004357,-0.001511,-0.05295,0.014557,0.005914,0.045339,0.020491,0.01163,0.011417,0.026786,-0.005696,0.044519,0.056356,0.019246,-0.022986,0.027986,-0.019878,-0.03283,-0.027301,-0.001957,0.000672,-0.025853,0.049954,0.030327,0.085031,-0.04761,0.040933,-0.038543,0.025517,0.036078,0.032642,0.017922,0.042792,0.049234,-0.037459,-0.043644,-0.058459,-0.013252,-0.010419,0.003661,-0.000534,0.033982,0.031032,0.017119,0.00261,-0.059511,0.015335,0.002775,-0.015049,-0.043467,-0.002912,-0.069303,0.010295,-0.042858,-0.009359,-0.01692,-0.021534,0.047166,-0.012552


In [105]:
premiered_list = list(more_reduced_df['premiered'].astype(str))
p_list_no_zeros = [val for val in premiered_list if val != '0']
oldest = int((min(p_list_no_zeros)).split()[1])
print(oldest)
youngest = int((max(p_list_no_zeros)).split()[1])
print(youngest)

1968
2019


In [106]:
YR_RANGE = youngest - oldest + 1    # 2019 - 1968 + 1 = 52
SEASONS = ['Spring', 'Summer', 'Fall', 'Winter']
all_yrs = np.empty((0, YR_RANGE))   # shape = (0,52), ind0 = 1968
all_seasons = np.empty((0, len(SEASONS)))   # shape = (0, 4)
premiered = more_reduced_df['premiered'].astype(str)
for i in range(len(more_reduced_df.index)): # 0 to 2854
    premiered_year = np.zeros((YR_RANGE,))
    premiered_season = np.zeros((len(SEASONS),))
    splits = premiered.iloc[i].split()
    if(len(splits) != 1):
        premiered_year[int(splits[1])-oldest] = 1
        premiered_season[SEASONS.index(splits[0])] = 1
    all_yrs = np.append(all_yrs, premiered_year.reshape(1, -1),axis=0)
    all_seasons = np.append(all_seasons, premiered_season.reshape(1, -1),axis=0)
print(all_yrs.shape)
print(all_seasons.shape)

(2855, 52)
(2855, 4)


In [107]:
for i in range(all_yrs.shape[1]):
    more_reduced_df.insert(len(more_reduced_df.columns), "premiered_{}".format(i+oldest), all_yrs[:,i])
for i in range(all_seasons.shape[1]):
    more_reduced_df.insert(len(more_reduced_df.columns), "premiered_{}".format(SEASONS[i]), all_seasons[:,i])
print(more_reduced_df.shape)
more_reduced_df.head()

(2855, 465)


Unnamed: 0,animeID,episodes,premiered,score,scored_by,rank,popularity,members,favorites,genre_Action,genre_Adventure,genre_Comedy,genre_Dementia,genre_Drama,genre_Ecchi,genre_Fantasy,genre_Game,genre_Harem,genre_Historical,genre_Horror,genre_Kids,genre_Magic,genre_Military,genre_Music,genre_Mystery,genre_Psychological,genre_Romance,genre_Sci-Fi,genre_Slice of Life,genre_Sports,"studio_""Brains Base""",studio_A-1 Pictures,studio_AIC,studio_Arms,studio_Bones,studio_Diomedea,studio_Doga Kobo,studio_Gainax,studio_Gonzo,studio_J.C.Staff,studio_Kyoto Animation,studio_MAPPA,studio_Madhouse,studio_Manglobe,studio_Nippon Animation,studio_OLM,studio_P.A. Works,studio_Production I.G,studio_Production Reed,studio_Satelight,studio_Shaft,studio_Shin-Ei Animation,studio_Silver Link.,studio_Studio Deen,studio_Studio Gallop,studio_Studio Gokumi,studio_Studio Pierrot,studio_Sunrise,studio_TMS Entertainment,studio_TNK,studio_Tatsunoko Production,studio_Toei Animation,studio_Xebec,studio_Zexcs,studio_feel.,studio_ufotable,source_Game,source_Light novel,source_Manga,source_Novel,source_Original,source_Unknown,source_Visual novel,producer_Aniplex,producer_Bandai Visual,producer_DAX Production,producer_Dentsu,producer_Frontier Works,producer_Fuji TV,producer_Genco,producer_Geneon Universal Entertainment,producer_Kadokawa Shoten,producer_Lantis,producer_Media Factory,producer_Movic,producer_NHK,producer_Nihon Ad Systems,producer_Pony Canyon,producer_Production I.G,producer_Shochiku,producer_Shogakukan Productions,producer_Sotsu,producer_Starchild Records,producer_TBS,producer_TV Asahi,producer_TV Tokyo,producer_VAP,producer_Victor Entertainment,rating_G - All Ages,rating_PG - Children,rating_PG-13 - Teens 13 or older,rating_R - 17+ (violence & profanity),rating_R+ - Mild Nudity,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,synopsis_embedded_1,synopsis_embedded_2,synopsis_embedded_3,synopsis_embedded_4,synopsis_embedded_5,synopsis_embedded_6,synopsis_embedded_7,synopsis_embedded_8,synopsis_embedded_9,synopsis_embedded_10,synopsis_embedded_11,synopsis_embedded_12,synopsis_embedded_13,synopsis_embedded_14,synopsis_embedded_15,synopsis_embedded_16,synopsis_embedded_17,synopsis_embedded_18,synopsis_embedded_19,synopsis_embedded_20,synopsis_embedded_21,synopsis_embedded_22,synopsis_embedded_23,synopsis_embedded_24,synopsis_embedded_25,synopsis_embedded_26,synopsis_embedded_27,synopsis_embedded_28,synopsis_embedded_29,synopsis_embedded_30,synopsis_embedded_31,synopsis_embedded_32,synopsis_embedded_33,synopsis_embedded_34,synopsis_embedded_35,synopsis_embedded_36,synopsis_embedded_37,synopsis_embedded_38,synopsis_embedded_39,synopsis_embedded_40,synopsis_embedded_41,synopsis_embedded_42,synopsis_embedded_43,synopsis_embedded_44,synopsis_embedded_45,synopsis_embedded_46,synopsis_embedded_47,synopsis_embedded_48,synopsis_embedded_49,synopsis_embedded_50,synopsis_embedded_51,synopsis_embedded_52,synopsis_embedded_53,synopsis_embedded_54,synopsis_embedded_55,synopsis_embedded_56,synopsis_embedded_57,synopsis_embedded_58,synopsis_embedded_59,synopsis_embedded_60,synopsis_embedded_61,synopsis_embedded_62,synopsis_embedded_63,synopsis_embedded_64,synopsis_embedded_65,synopsis_embedded_66,synopsis_embedded_67,synopsis_embedded_68,synopsis_embedded_69,synopsis_embedded_70,synopsis_embedded_71,synopsis_embedded_72,synopsis_embedded_73,synopsis_embedded_74,synopsis_embedded_75,synopsis_embedded_76,synopsis_embedded_77,synopsis_embedded_78,synopsis_embedded_79,synopsis_embedded_80,synopsis_embedded_81,synopsis_embedded_82,synopsis_embedded_83,synopsis_embedded_84,synopsis_embedded_85,synopsis_embedded_86,synopsis_embedded_87,synopsis_embedded_88,synopsis_embedded_89,synopsis_embedded_90,synopsis_embedded_91,synopsis_embedded_92,synopsis_embedded_93,synopsis_embedded_94,synopsis_embedded_95,synopsis_embedded_96,synopsis_embedded_97,synopsis_embedded_98,synopsis_embedded_99,synopsis_embedded_100,synopsis_embedded_101,synopsis_embedded_102,synopsis_embedded_103,synopsis_embedded_104,synopsis_embedded_105,synopsis_embedded_106,synopsis_embedded_107,synopsis_embedded_108,synopsis_embedded_109,synopsis_embedded_110,synopsis_embedded_111,synopsis_embedded_112,synopsis_embedded_113,synopsis_embedded_114,synopsis_embedded_115,synopsis_embedded_116,synopsis_embedded_117,synopsis_embedded_118,synopsis_embedded_119,synopsis_embedded_120,synopsis_embedded_121,synopsis_embedded_122,synopsis_embedded_123,synopsis_embedded_124,synopsis_embedded_125,synopsis_embedded_126,synopsis_embedded_127,synopsis_embedded_128,synopsis_embedded_129,synopsis_embedded_130,synopsis_embedded_131,synopsis_embedded_132,synopsis_embedded_133,synopsis_embedded_134,synopsis_embedded_135,synopsis_embedded_136,synopsis_embedded_137,synopsis_embedded_138,synopsis_embedded_139,synopsis_embedded_140,synopsis_embedded_141,synopsis_embedded_142,synopsis_embedded_143,synopsis_embedded_144,synopsis_embedded_145,synopsis_embedded_146,synopsis_embedded_147,synopsis_embedded_148,synopsis_embedded_149,synopsis_embedded_150,synopsis_embedded_151,synopsis_embedded_152,synopsis_embedded_153,synopsis_embedded_154,synopsis_embedded_155,synopsis_embedded_156,synopsis_embedded_157,synopsis_embedded_158,synopsis_embedded_159,synopsis_embedded_160,synopsis_embedded_161,synopsis_embedded_162,synopsis_embedded_163,synopsis_embedded_164,synopsis_embedded_165,synopsis_embedded_166,synopsis_embedded_167,synopsis_embedded_168,synopsis_embedded_169,synopsis_embedded_170,synopsis_embedded_171,synopsis_embedded_172,synopsis_embedded_173,synopsis_embedded_174,synopsis_embedded_175,synopsis_embedded_176,synopsis_embedded_177,synopsis_embedded_178,synopsis_embedded_179,synopsis_embedded_180,synopsis_embedded_181,synopsis_embedded_182,synopsis_embedded_183,synopsis_embedded_184,synopsis_embedded_185,synopsis_embedded_186,synopsis_embedded_187,synopsis_embedded_188,synopsis_embedded_189,synopsis_embedded_190,synopsis_embedded_191,synopsis_embedded_192,synopsis_embedded_193,synopsis_embedded_194,synopsis_embedded_195,synopsis_embedded_196,synopsis_embedded_197,synopsis_embedded_198,synopsis_embedded_199,synopsis_embedded_200,synopsis_embedded_201,synopsis_embedded_202,synopsis_embedded_203,synopsis_embedded_204,synopsis_embedded_205,synopsis_embedded_206,synopsis_embedded_207,synopsis_embedded_208,synopsis_embedded_209,synopsis_embedded_210,synopsis_embedded_211,synopsis_embedded_212,synopsis_embedded_213,synopsis_embedded_214,synopsis_embedded_215,synopsis_embedded_216,synopsis_embedded_217,synopsis_embedded_218,synopsis_embedded_219,synopsis_embedded_220,synopsis_embedded_221,synopsis_embedded_222,synopsis_embedded_223,synopsis_embedded_224,synopsis_embedded_225,synopsis_embedded_226,synopsis_embedded_227,synopsis_embedded_228,synopsis_embedded_229,synopsis_embedded_230,synopsis_embedded_231,synopsis_embedded_232,synopsis_embedded_233,synopsis_embedded_234,synopsis_embedded_235,synopsis_embedded_236,synopsis_embedded_237,synopsis_embedded_238,synopsis_embedded_239,synopsis_embedded_240,synopsis_embedded_241,synopsis_embedded_242,synopsis_embedded_243,synopsis_embedded_244,synopsis_embedded_245,synopsis_embedded_246,synopsis_embedded_247,synopsis_embedded_248,synopsis_embedded_249,synopsis_embedded_250,synopsis_embedded_251,synopsis_embedded_252,synopsis_embedded_253,synopsis_embedded_254,synopsis_embedded_255,synopsis_embedded_256,synopsis_embedded_257,synopsis_embedded_258,synopsis_embedded_259,synopsis_embedded_260,synopsis_embedded_261,synopsis_embedded_262,synopsis_embedded_263,synopsis_embedded_264,synopsis_embedded_265,synopsis_embedded_266,synopsis_embedded_267,synopsis_embedded_268,synopsis_embedded_269,synopsis_embedded_270,synopsis_embedded_271,synopsis_embedded_272,synopsis_embedded_273,synopsis_embedded_274,synopsis_embedded_275,synopsis_embedded_276,synopsis_embedded_277,synopsis_embedded_278,synopsis_embedded_279,synopsis_embedded_280,synopsis_embedded_281,synopsis_embedded_282,synopsis_embedded_283,synopsis_embedded_284,synopsis_embedded_285,synopsis_embedded_286,synopsis_embedded_287,synopsis_embedded_288,synopsis_embedded_289,synopsis_embedded_290,synopsis_embedded_291,synopsis_embedded_292,synopsis_embedded_293,synopsis_embedded_294,synopsis_embedded_295,synopsis_embedded_296,synopsis_embedded_297,synopsis_embedded_298,synopsis_embedded_299,synopsis_embedded_300,premiered_1968,premiered_1969,premiered_1970,premiered_1971,premiered_1972,premiered_1973,premiered_1974,premiered_1975,premiered_1976,premiered_1977,premiered_1978,premiered_1979,premiered_1980,premiered_1981,premiered_1982,premiered_1983,premiered_1984,premiered_1985,premiered_1986,premiered_1987,premiered_1988,premiered_1989,premiered_1990,premiered_1991,premiered_1992,premiered_1993,premiered_1994,premiered_1995,premiered_1996,premiered_1997,premiered_1998,premiered_1999,premiered_2000,premiered_2001,premiered_2002,premiered_2003,premiered_2004,premiered_2005,premiered_2006,premiered_2007,premiered_2008,premiered_2009,premiered_2010,premiered_2011,premiered_2012,premiered_2013,premiered_2014,premiered_2015,premiered_2016,premiered_2017,premiered_2018,premiered_2019,premiered_Spring,premiered_Summer,premiered_Fall,premiered_Winter
0,1,26.0,Spring 1998,8.81,405664,26,39,795733,43460,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.029809,0.042396,0.022913,0.040259,-0.039149,-0.011959,0.016059,-0.064689,0.050221,0.045491,0.012603,-0.046489,-0.007768,0.011972,-0.054675,0.034183,0.008571,0.073747,-0.006751,0.002686,0.001647,0.037568,-0.011122,-0.006467,0.034074,-0.00513,-0.067323,0.057732,0.03045,-0.02217,-0.002778,-0.025179,-0.017223,0.027981,0.00732,-0.016938,0.013608,-0.023168,0.032683,0.038371,0.065416,-0.013103,0.03174,0.029739,0.01164,-0.045497,-0.009923,0.005677,0.060461,0.029093,-0.022794,0.022537,-0.005312,-0.041699,-0.006712,0.044132,-0.008567,-0.077645,0.002706,-0.033053,-0.029164,0.064243,-0.054654,-0.049431,-0.03452,-0.013828,-0.004685,0.052337,-0.035267,0.0586,0.041502,-0.026901,0.022177,0.002181,-0.070948,-0.03016,0.046594,0.037892,0.00576,0.05166,0.030118,-0.051224,0.014102,-0.040939,-0.000171,-0.049184,-0.062894,0.059879,0.020875,0.035167,0.009019,-0.015971,-0.048447,-0.048976,-0.000485,0.011175,0.030076,0.025286,0.000305,-0.032328,0.017055,-0.033233,-0.002771,-0.001819,-0.021702,-0.0222,0.005008,-0.025583,-0.012564,-0.034225,-0.014823,0.017698,0.013105,0.008766,0.030944,0.016009,0.006116,-0.004369,0.063656,0.03222,-0.077808,-0.008884,-0.035245,-0.016034,-0.026342,-0.010016,-0.043281,-0.005266,0.00396,0.039512,-0.000114,-0.042252,-0.050982,0.000868,-0.025522,0.005533,-0.003248,-0.011425,-0.017867,0.058968,0.047962,-0.035847,-0.022813,0.030239,0.020939,0.045603,-0.00866,0.006972,-0.026107,0.018222,0.041626,-0.014078,-0.041253,0.043558,-0.007807,-0.03033,-0.008229,-0.045358,0.008506,-0.04455,-0.016403,0.032078,0.029107,0.019541,-0.000979,-0.043553,0.059217,-0.021323,-0.021795,0.012713,-0.041548,-0.025159,0.007053,-0.046681,0.011992,-0.021492,-0.020409,-0.039861,-0.039493,-0.021282,-0.043401,-0.054954,-0.003071,-0.025961,-0.019901,-0.013816,0.008249,0.013152,0.050161,0.018735,0.011302,0.025814,0.038649,0.001601,-0.062521,0.034121,-0.018548,-0.008048,-0.045877,-0.045716,0.052964,0.023042,-0.02705,0.005686,0.011264,-0.020137,-0.028312,-0.012747,-0.025026,-0.028384,-0.050707,0.013471,0.00906,0.029816,-0.052157,0.039922,0.047676,0.010645,-0.088924,-0.022553,0.007775,0.013793,0.011874,0.001426,0.015962,0.004444,0.018199,0.020031,0.036024,0.004275,0.013852,-0.041363,0.030135,0.037383,0.043992,-0.01189,0.009296,-0.059358,0.028553,-0.004396,0.01155,-0.016681,0.011991,-0.057401,0.022944,0.002535,0.043138,0.015089,-0.00354,-0.02121,-0.000755,0.009067,0.021951,0.076348,0.009252,-0.041426,0.03537,-0.032637,-0.036216,-0.024516,0.013985,-0.006748,-0.043264,0.035988,0.042097,0.086749,-0.04612,-0.018097,-0.04304,0.026702,0.001524,0.026744,0.006809,0.047653,0.052771,-0.031369,-0.047697,-0.054508,-0.019475,0.00538,-0.007196,0.013082,0.017379,0.02973,0.044012,-0.020394,-0.071408,-0.000607,0.015265,0.017696,-0.041449,0.015911,-0.072653,0.01074,-0.032635,-0.015616,0.003619,-0.010637,0.043498,-0.00721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,5,1.0,0,8.41,120243,164,449,197791,776,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.034286,0.033996,0.007923,0.038971,-0.032601,-0.008406,0.014557,-0.058504,0.061666,0.059972,-0.005531,-0.069254,-0.009848,0.035814,-0.059442,0.031862,0.008733,0.053755,-0.005296,-0.013566,-0.016636,0.010033,-0.008637,0.007356,0.033484,-0.020868,-0.0584,0.063953,0.027596,0.005782,0.00043,-0.014363,-0.008073,-0.016039,-0.002752,-0.00802,0.005582,-0.012561,0.040396,0.035001,0.051261,-0.025975,0.035493,-0.020143,-0.016543,-0.04018,-0.034073,0.02998,0.042406,0.005518,0.001988,0.014691,0.008662,-0.025833,-0.010682,0.020041,-0.007578,-0.043492,0.018496,-0.048332,-0.018077,0.055186,-0.027436,-0.063601,-0.023318,-0.020217,0.013749,0.057316,-0.015806,0.037613,0.013163,-0.005739,0.068124,0.009256,-0.082062,-0.030137,0.044616,0.042589,-0.003027,0.055808,0.018468,-0.046793,0.020286,0.003879,-0.020348,-0.060697,-0.071962,0.049216,-0.003338,0.022231,0.048474,0.005185,-0.032714,-0.061541,-0.009559,-0.017448,0.007921,-0.005968,-0.001945,0.01134,0.010297,-0.053358,0.001377,-0.008766,-0.010449,-0.026977,-0.015875,-0.033238,0.024455,-0.043913,0.000204,-0.004407,0.001314,-0.015963,0.038036,0.015312,0.021343,0.005879,0.04642,0.048484,-0.078182,0.005844,-0.04027,0.004293,-0.02106,-0.007908,-0.023996,-0.057414,0.000126,0.046519,-0.007095,-0.037806,-0.049617,-0.024828,-0.004732,-0.033098,0.010501,-0.010643,-0.031625,0.034885,0.026372,-0.043046,0.009983,0.021871,0.013261,0.002401,-0.016962,-0.030833,-0.059017,-0.002722,0.046054,-0.004384,-0.034703,0.03347,-0.006124,-0.008277,-0.026149,-0.05044,-0.015401,-0.023633,-0.014713,0.048229,0.0123,0.014094,0.00016,-0.041214,0.029195,-0.014758,0.012803,-0.000423,-0.059128,-0.032127,-0.02514,-0.065613,-0.001999,-0.04045,0.012925,-0.037661,-0.018058,-0.011506,-0.040262,-0.039131,-0.016425,0.002404,-0.02114,-0.01366,-0.000422,0.032882,0.060541,0.043737,0.019885,-0.010006,0.020332,-0.001026,-0.064583,0.021543,-0.030945,-0.002331,-0.030033,-0.059975,0.054624,0.013319,-0.023197,-0.003349,-0.001056,0.000562,-0.031588,0.011888,0.022546,-0.029294,-0.020384,0.018045,-0.013425,0.036303,-0.044788,0.02021,0.044186,-0.008491,-0.061528,0.020645,0.001539,0.011159,0.012887,0.003158,0.034777,-0.023623,0.033818,0.018494,0.043964,-0.0059,0.037656,-0.034594,0.027668,0.024776,0.007987,-0.024402,0.011711,-0.028553,0.046212,0.021569,-0.005814,0.00927,0.003437,-0.058497,0.017021,-0.015073,0.047122,0.010372,0.008882,-0.01488,0.033415,-0.007068,0.032605,0.033175,0.01949,-0.026248,0.035107,0.000343,-0.030352,-0.024482,-0.002295,0.000352,-0.030998,0.03665,0.032162,0.082847,-0.02951,-6.9e-05,-0.036153,0.017187,0.022112,0.040818,0.015384,0.016423,0.038067,-0.034941,-0.04781,-0.046722,0.000735,0.007427,-0.009773,-0.008749,0.006419,0.036121,0.020225,-0.000781,-0.050707,0.017542,0.038131,-0.001431,-0.038774,0.008558,-0.053541,0.013464,-0.019009,0.013802,0.005209,-0.00686,0.036505,-0.017165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,6,26.0,Spring 1998,8.3,212537,255,146,408548,10432,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0.046075,0.051554,0.021494,0.042679,-0.033989,0.011614,0.016296,-0.056546,0.050516,0.041485,-0.011288,-0.066922,-0.007412,0.036506,-0.071361,0.035155,0.037431,0.073922,-0.014399,-0.016527,-0.009324,0.024258,0.003378,0.013267,0.048147,-0.026136,-0.056828,0.036333,0.028268,-0.005679,-0.006641,-0.017382,-0.03217,0.021557,-0.001583,0.000194,0.029126,-0.011996,0.046084,0.040868,0.077538,-0.046362,0.051705,-0.010982,-0.010465,-0.033262,-0.011877,-0.00061,0.064822,0.014219,-0.018036,0.041798,-0.014228,-0.027071,-0.003972,0.02302,-0.014469,-0.056487,0.007204,-0.032974,-0.002466,0.073331,-0.022479,-0.04996,-0.025193,-0.010937,-0.002071,0.044017,-0.032198,0.045328,0.034324,0.011872,0.03682,0.00782,-0.074346,-0.060799,0.032817,0.0566,0.02572,0.065892,0.016496,-0.039125,0.009787,-0.037298,-0.017261,-0.04606,-0.061661,0.07545,-0.002407,0.018012,0.022199,0.008195,-0.02463,-0.050168,-0.0231,-0.018181,0.031423,0.026293,0.005692,0.001506,-0.01677,-0.019027,0.019068,-0.000546,-0.008867,-0.038206,-0.007084,-0.018574,0.025321,-0.040857,-0.021163,-0.008961,0.006049,0.007165,0.034344,0.00619,-0.003011,-0.019504,0.049238,0.049304,-0.078634,0.018836,-0.031849,0.012006,-0.036743,-0.040874,-0.019633,-0.022892,0.016259,0.012799,-0.024395,-0.051546,-0.052428,-0.012789,-0.037025,-0.030559,-0.015095,-0.001077,-0.023634,0.026297,0.049129,-0.014973,0.00316,0.030085,0.031603,0.019252,-0.023041,-0.027789,-0.036158,0.01199,0.046321,0.020768,-0.042878,0.034118,-0.025504,-0.012411,-0.03065,-0.050731,0.003357,-0.034415,-0.004984,0.038504,0.033941,0.008554,-0.024119,-0.048377,0.028926,-0.017718,0.014117,-0.006992,-0.040932,-0.031195,-0.01844,-0.057449,-0.006374,-0.019075,0.025331,-0.046829,-0.000778,-0.011974,-0.057296,-0.036934,0.002215,0.007896,-0.026588,-0.005585,-0.013742,0.021777,0.068491,0.03676,0.049298,0.028468,0.038089,0.019071,-0.050208,0.001869,-0.032115,0.001188,-0.050244,-0.063678,0.027357,0.013809,-0.010756,-0.010191,0.020405,-0.004609,-0.014511,-0.002724,0.007486,-0.023171,-0.034837,0.015229,0.013848,0.014829,-0.078058,0.013982,0.018455,0.007793,-0.085684,-0.01429,0.00323,0.01507,0.010628,-0.004694,0.050373,-0.010192,0.03277,0.029111,0.03597,-0.006674,0.032799,-0.031484,0.028996,0.028729,0.036776,-0.015768,-0.003968,-0.022568,0.039227,-0.008952,0.010346,0.001793,0.001189,-0.061668,0.015997,0.013199,0.042566,0.021276,-0.000617,-0.012438,0.013163,0.003257,0.023913,0.048648,0.02407,-0.036067,0.022092,-0.018226,-0.038161,-0.029356,-0.016261,0.009063,-0.046335,0.046327,0.049616,0.080244,-0.028453,0.02287,-0.029462,0.000303,0.014516,0.047043,0.02484,0.023857,0.010588,-0.037943,-0.021131,-0.025452,-0.008461,0.015443,0.011314,-0.036393,0.017949,0.053921,0.018386,-0.000554,-0.073014,-0.001892,0.016432,0.015031,-0.049164,0.001321,-0.070184,0.002559,-0.05597,-0.004276,0.010292,-0.036921,0.017824,0.005914,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
22,7,26.0,Summer 2002,7.33,32837,2371,1171,79397,537,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0.040084,0.043519,0.011331,0.013324,-0.049597,0.013523,-0.007891,-0.050229,0.015264,0.034034,0.007409,-0.047748,-0.013092,0.016576,-0.050811,0.035068,0.015911,0.051699,0.00418,-0.007916,0.004467,0.015594,-0.019425,-0.017235,0.013845,-0.012678,-0.064591,0.038377,0.003203,-0.008733,0.001605,0.000176,-0.025672,0.005307,0.022348,0.008782,-0.006471,-0.031222,0.043191,0.036688,0.071859,-0.018241,0.049503,-0.000172,0.010562,-0.028092,-0.003908,0.011069,0.00617,0.021578,-0.050432,0.031656,0.013686,-0.003417,-0.043547,0.030178,-0.018181,-0.038867,0.019866,-0.039338,0.005569,0.0609,-0.056638,-0.06827,-0.040912,-0.010647,0.005331,0.04417,-0.018075,0.039898,0.035091,0.014663,0.04517,0.019552,-0.023127,-0.050683,0.0525,0.051285,0.007713,0.068368,0.033391,-0.031139,0.009146,-0.009842,-0.013967,-0.055956,-0.070713,0.014257,0.023108,0.037685,0.02007,0.000528,-0.04622,-0.060177,0.014468,-0.030158,0.030222,-0.009351,-0.001452,-0.007641,-0.000649,-0.043326,0.005714,0.019813,0.008822,-0.021916,-0.038958,-0.021583,0.023121,-0.012859,-0.017479,0.023516,-0.026572,0.008006,0.027996,0.022325,0.030625,-0.002895,0.089061,0.010793,-0.061058,-0.000739,-0.016678,0.004759,-0.022246,-0.008839,-0.013216,-0.004543,-0.008449,0.020789,-0.008573,-0.05909,-0.058241,-0.036238,-0.025937,-0.018035,-0.00175,-0.048649,0.008116,0.012063,0.048729,-0.030933,0.037315,0.005854,0.057903,0.036704,-0.028571,-0.014908,-0.026466,-0.001457,0.031291,0.029841,-0.049659,0.046499,-0.003366,-0.032992,-0.030007,-0.041595,-0.001108,-0.017381,0.017134,0.028097,0.002674,-0.002948,0.005731,-0.043066,0.019577,-0.014403,0.002002,0.006388,-0.073542,-0.012233,-0.003693,-0.06872,-0.010161,-0.036937,0.031861,-0.040947,-0.004414,-0.011904,-0.035708,-0.059962,0.021938,-0.027605,0.000439,-0.003035,0.023976,-0.005226,0.06366,0.051885,-0.00708,-0.030403,0.035594,-0.011832,-0.02218,0.020135,0.015628,-0.036521,-0.058518,-0.089885,0.025523,-0.01144,-0.007256,-0.001377,-0.025598,-0.02816,-0.006038,-0.008614,0.018424,-0.035472,-0.055351,0.003703,-0.00856,0.026524,-0.063033,0.023271,0.028832,-0.013214,-0.054398,-0.013727,0.002572,0.010427,0.01055,-0.015907,0.022653,-0.019953,0.006097,0.011575,0.027497,-0.009629,0.02892,-0.026982,0.002148,0.033218,0.025723,-0.012843,-0.002086,-0.02892,0.019965,-0.007975,-0.001668,-0.033926,0.013474,-0.02342,0.006162,-0.006549,0.011702,0.031006,0.011067,-0.024196,0.002276,0.019851,0.042411,0.024429,0.020638,0.006351,0.032375,-0.024066,-0.001797,-0.027247,-0.005463,0.012498,-0.038402,0.038218,0.022269,0.083581,-0.021,0.016294,-0.023244,0.020538,0.038338,0.030338,0.02599,0.051838,0.062437,-0.042235,-0.040669,-0.05852,-0.030266,0.00729,-0.00091,-0.012541,0.014318,0.052671,0.010704,0.006528,-0.060641,-0.027508,0.013195,0.003946,-0.030247,-0.008901,-0.079197,3.1e-05,-0.024609,-0.008391,0.026849,-0.003479,0.048176,-0.00202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
28,8,52.0,Fall 2004,7.03,4894,3544,3704,11708,14,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0.033509,0.073026,0.032067,0.033866,-0.035534,-0.023383,0.004171,-0.052128,0.036317,0.038405,0.016212,-0.049269,-0.018687,0.029037,-0.059199,0.026409,0.03487,0.035396,-0.019874,-0.012742,-0.011865,0.022725,-0.021582,0.005387,0.040823,-0.016987,-0.052997,0.037102,0.014931,-0.002932,-0.007679,0.002017,-0.019605,0.008567,0.021359,-5.7e-05,-0.006793,-0.02099,0.040467,0.028692,0.049575,-0.013557,0.054332,-0.012396,0.006611,-0.022289,-0.027113,0.008252,0.038436,0.017671,-0.034137,0.036846,0.006656,-0.01432,0.004698,0.007955,-0.023433,-0.04209,-0.010677,-0.015413,-0.022483,0.056229,-0.047642,-0.049667,-0.027916,-0.019393,0.027552,0.071266,-0.015817,0.049222,0.028127,-0.005339,0.039752,0.014397,-0.046257,-0.055838,0.044878,0.054782,0.015369,0.086399,0.029277,-0.061364,0.030693,-0.000921,-0.015918,-0.022378,-0.084741,0.054159,-0.010346,0.015542,0.030269,0.001458,-0.025678,-0.065957,-0.017067,-0.013187,0.019158,0.015513,-0.009476,-0.005244,0.00691,-0.031384,-0.000513,-0.002002,-0.015753,-0.041126,-0.010517,-0.039384,-0.002956,-0.037395,-0.003931,0.024675,-0.013818,0.001268,0.031092,0.027338,-0.000107,-0.001816,0.03866,0.027115,-0.073232,-0.014135,-0.043308,-0.011086,-0.018441,-0.016184,-0.009338,-0.041386,0.005259,0.048489,0.016264,-0.065162,-0.068298,-0.017547,-0.036227,-0.037886,-0.003537,-0.018665,-0.035893,0.02881,0.018681,-0.038282,0.004194,-0.005613,0.011336,0.024751,-0.012344,-0.043308,-0.03074,0.011236,0.040221,0.003207,-0.051683,0.036712,0.016157,0.019987,-0.022006,-0.056219,-0.010696,-0.038547,-0.003561,0.03321,0.023879,0.003132,0.001547,-0.052919,0.004402,0.011646,0.016467,-0.018126,-0.070273,-0.040453,-0.031468,-0.082942,-0.003322,-0.008578,0.023592,-0.018896,-0.003234,-0.020846,-0.023816,-0.043606,0.012595,0.009152,-0.036216,0.000923,0.00013,0.011515,0.090346,0.017398,0.013981,0.038001,0.030625,-0.009913,-0.054036,0.012278,0.008175,0.011279,-0.051389,-0.073863,0.037429,0.011746,-0.003337,-0.006695,-0.007426,0.002193,-0.008486,-0.001289,0.005537,-0.028401,-0.022333,0.01798,-0.001913,0.030877,-0.045051,0.014192,0.038983,-0.018247,-0.076112,0.005695,-0.003393,0.001793,0.012,-0.020973,0.063947,-0.0235,0.017504,0.023846,0.018791,0.005042,0.026277,-0.040921,0.028756,0.057596,0.018601,-0.019259,0.003439,-0.032663,0.021317,-0.007353,0.018049,0.004357,-0.001511,-0.05295,0.014557,0.005914,0.045339,0.020491,0.01163,0.011417,0.026786,-0.005696,0.044519,0.056356,0.019246,-0.022986,0.027986,-0.019878,-0.03283,-0.027301,-0.001957,0.000672,-0.025853,0.049954,0.030327,0.085031,-0.04761,0.040933,-0.038543,0.025517,0.036078,0.032642,0.017922,0.042792,0.049234,-0.037459,-0.043644,-0.058459,-0.013252,-0.010419,0.003661,-0.000534,0.033982,0.031032,0.017119,0.00261,-0.059511,0.015335,0.002775,-0.015049,-0.043467,-0.002912,-0.069303,0.010295,-0.042858,-0.009359,-0.01692,-0.021534,0.047166,-0.012552,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [108]:
# remove premiered column (no longer needed)
more_reduced_df = more_reduced_df.drop(['premiered'], axis = 1)
more_reduced_df.head()

# write back to csv
more_reduced_df.to_csv(r'data/model_training_data.csv')