In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
from pylab import rcParams

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

# keras imports - comment them out or do `pip install keras`
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers.embeddings import Embedding

# gensim for pretrained word2vec model
import gensim

# for synopsis clean up
import string

# list of stopwords used by MySQL in MyISAM
stop_words = ["a's" , "able" , "about" , "above" , "according" , "accordingly" , "across" , "actually" , "after" , "afterwards" , "again" , "against" , "ain't" , "all" , "allow" , "allows" , "almost" , "alone" , "along" , "already" , "also" , "although" , "always" , "am" , "among" , "amongst" , "an" , "and" , "another" , "any" , "anybody" , "anyhow" , "anyone" , "anything" , "anyway" , "anyways" , "anywhere" , "apart" , "appear" , "appreciate" , "appropriate" , "are" , "aren't" , "around" , "as" , "aside" , "ask" , "asking" , "associated" , "at" , "available" , "away" , "awfully" , "be" , "became" , "because" , "become" , "becomes" , "becoming" , "been" , "before" , "beforehand" , "behind" , "being" , "believe" , "below" , "beside" , "besides" , "best" , "better" , "between" , "beyond" , "both" , "brief" , "but" , "by" , "c'mon" , "c's" , "came" , "can" , "can't" , "cannot" , "cant" , "cause" , "causes" , "certain" , "certainly" , "changes" , "clearly" , "co" , "com" , "come" , "comes" , "concerning" , "consequently" , "consider" , "considering" , "contain" , "containing" , "contains" , "corresponding" , "could" , "couldn't" , "course" , "currently" , "definitely" , "described" , "despite" , "did" , "didn't" , "different" , "do" , "does" , "doesn't" , "doing" , "don't" , "done" , "down" , "downwards" , "during" , "each" , "edu" , "eg" , "eight" , "either" , "else" , "elsewhere" , "enough" , "entirely" , "especially" , "et" , "etc" , "even" , "ever" , "every" , "everybody" , "everyone" , "everything" , "everywhere" , "ex" , "exactly" , "example" , "except" , "far" , "few" , "fifth" , "first" , "five" , "followed" , "following" , "follows" , "for" , "former" , "formerly" , "forth" , "four" , "from" , "further" , "furthermore" , "get" , "gets" , "getting" , "given" , "gives" , "go" , "goes" , "going" , "gone" , "got" , "gotten" , "greetings" , "had" , "hadn't" , "happens" , "hardly" , "has" , "hasn't" , "have" , "haven't" , "having" , "he" , "he's" , "hello" , "help" , "hence" , "her" , "here" , "here's" , "hereafter" , "hereby" , "herein" , "hereupon" , "hers" , "herself" , "hi" , "him" , "himself" , "his" , "hither" , "hopefully" , "how" , "howbeit" , "however" , "i'd" , "i'll" , "i'm" , "i've" , "ie" , "if" , "ignored" , "immediate" , "in" , "inasmuch" , "inc" , "indeed" , "indicate" , "indicated" , "indicates" , "inner" , "insofar" , "instead" , "into" , "inward" , "is" , "isn't" , "it" , "it'd" , "it'll" , "it's" , "its" , "itself" , "just" , "keep" , "keeps" , "kept" , "know" , "known" , "knows" , "last" , "lately" , "later" , "latter" , "latterly" , "least" , "less" , "lest" , "let" , "let's" , "like" , "liked" , "likely" , "little" , "look" , "looking" , "looks" , "ltd" , "mainly" , "many" , "may" , "maybe" , "me" , "mean" , "meanwhile" , "merely" , "might" , "more" , "moreover" , "most" , "mostly" , "much" , "must" , "my" , "myself" , "name" , "namely" , "nd" , "near" , "nearly" , "necessary" , "need" , "needs" , "neither" , "never" , "nevertheless" , "new" , "next" , "nine" , "no" , "nobody" , "non" , "none" , "noone" , "nor" , "normally" , "not" , "nothing" , "novel" , "now" , "nowhere" , "obviously" , "of" , "off" , "often" , "oh" , "ok" , "okay" , "old" , "on" , "once" , "one" , "ones" , "only" , "onto" , "or" , "other" , "others" , "otherwise" , "ought" , "our" , "ours" , "ourselves" , "out" , "outside" , "over" , "overall" , "own" , "particular" , "particularly" , "per" , "perhaps" , "placed" , "please" , "plus" , "possible" , "presumably" , "probably" , "provides" , "que" , "quite" , "qv" , "rather" , "rd" , "re" , "really" , "reasonably" , "regarding" , "regardless" , "regards" , "relatively" , "respectively" , "right" , "said" , "same" , "saw" , "say" , "saying" , "says" , "second" , "secondly" , "see" , "seeing" , "seem" , "seemed" , "seeming" , "seems" , "seen" , "self" , "selves" , "sensible" , "sent" , "serious" , "seriously" , "seven" , "several" , "shall" , "she" , "should" , "shouldn't" , "since" , "six" , "so" , "some" , "somebody" , "somehow" , "someone" , "something" , "sometime" , "sometimes" , "somewhat" , "somewhere" , "soon" , "sorry" , "specified" , "specify" , "specifying" , "still" , "sub" , "such" , "sup" , "sure" , "t's" , "take" , "taken" , "tell" , "tends" , "th" , "than" , "thank" , "thanks" , "thanx" , "that" , "that's" , "thats" , "the" , "their" , "theirs" , "them" , "themselves" , "then" , "thence" , "there" , "there's" , "thereafter" , "thereby" , "therefore" , "therein" , "theres" , "thereupon" , "these" , "they" , "they'd" , "they'll" , "they're" , "they've" , "think" , "third" , "this" , "thorough" , "thoroughly" , "those" , "though" , "three" , "through" , "throughout" , "thru" , "thus" , "to" , "together" , "too" , "took" , "toward" , "towards" , "tried" , "tries" , "truly" , "try" , "trying" , "twice" , "two" , "un" , "under" , "unfortunately" , "unless" , "unlikely" , "until" , "unto" , "up" , "upon" , "us" , "use" , "used" , "useful" , "uses" , "using" , "usually" , "value" , "various" , "very" , "via" , "viz" , "vs" , "want" , "wants" , "was" , "wasn't" , "way" , "we" , "we'd" , "we'll" , "we're" , "we've" , "welcome" , "well" , "went" , "were" , "weren't" , "what" , "what's" , "whatever" , "when" , "whence" , "whenever" , "where" , "where's" , "whereafter" , "whereas" , "whereby" , "wherein" , "whereupon" , "wherever" , "whether" , "which" , "while" , "whither" , "who" , "who's" , "whoever" , "whole" , "whom" , "whose" , "why" , "will" , "willing" , "wish" , "with" , "within" , "without" , "won't" , "wonder" , "would" , "wouldn't" , "yes" , "yet" , "you" , "you'd" , "you'll" , "you're" , "you've" , "your" , "yours" , "yourself" , "yourselves" , "zero"]

rcParams['figure.figsize'] = 10, 6
display(HTML("<style>.container { width:95% !important; }</style>"))

Using TensorFlow backend.


In [3]:
df = pd.read_csv("data/tidy_anime.csv")
df.shape

(77911, 28)

In [4]:
desired_cols = ['animeID', 'title_english', 'type', 'source', 'producers', 'genre', 'studio',
               'episodes', 'premiered', 'rating', 'score', 'scored_by', 'rank', 'popularity',
               'members', 'favorites', 'synopsis']
truncated_df = df[desired_cols]

In [5]:
desired_cols = ['animeID', 'title_english', 'type', 'source', 'producers', 'genre', 'studio',
               'episodes', 'premiered', 'rating', 'score', 'scored_by', 'rank', 'popularity',
               'members', 'favorites', 'synopsis']
truncated_df = df[desired_cols]
truncated_df.head()

Unnamed: 0,animeID,title_english,type,source,producers,genre,studio,episodes,premiered,rating,score,scored_by,rank,popularity,members,favorites,synopsis
0,1,Cowboy Bebop,TV,Original,Bandai Visual,Action,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
1,1,Cowboy Bebop,TV,Original,Bandai Visual,Adventure,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
2,1,Cowboy Bebop,TV,Original,Bandai Visual,Comedy,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
3,1,Cowboy Bebop,TV,Original,Bandai Visual,Drama,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."
4,1,Cowboy Bebop,TV,Original,Bandai Visual,Sci-Fi,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized sever..."


In [62]:
#change NaN to 0 in Premiered column

truncated_df.loc[:,'premiered'] = truncated_df.loc[:,'premiered'].fillna(0)

In [63]:
# filter out bad titles. Only want titles that have an english name

orig_len = len(truncated_df)
filtered_df = truncated_df[truncated_df['title_english'].notnull()]
new_len = len(filtered_df)
print ("removed {} bad anime after filtering for english titled anime only".format(orig_len - new_len))

# drop NaN rows
filtered_df.dropna(inplace=True)
print ("removed {} bad anime after dropping NaN rows".format(new_len - len(filtered_df)))


removed 30430 bad anime after filtering for english titled anime only
removed 8678 bad anime after dropping NaN rows


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [64]:
# currently the anime is duplicated, one row per genre per studio. We need to flatten all to one row
# also need to do this for type, source, producers, rating

all_ids = set(filtered_df['animeID'].unique()) # 1.8K anime IDs
print ("{} unique anime".format(len(all_ids)))

id_genre_mapping = {}
for each_id in all_ids:
    genre_list = list(filtered_df[truncated_df['animeID'] == each_id]['genre'])
    id_genre_mapping[each_id] = genre_list
    
id_studio_mapping = {}
for each_id in all_ids:
    id_studio_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['studio'])
    
id_source_mapping = {}
for each_id in all_ids:
    id_source_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['source'])
    
id_producers_mapping = {}
for each_id in all_ids:
    id_producers_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['producers'])
    
id_rating_mapping = {}
for each_id in all_ids:
    id_rating_mapping[each_id] = list(filtered_df[truncated_df['animeID'] == each_id]['rating'])

2855 unique anime


  if __name__ == '__main__':
  


In [65]:
# get distinct df, remove duplicates
reduced_df = filtered_df.groupby('animeID').head(1)

In [66]:
# will add 40 columns to the data
all_genres = set([item for sublist in id_genre_mapping.values() for item in sublist])
len(all_genres) 

40

In [67]:
anime_IDs = reduced_df.animeID.tolist()
genres_new = []
studios_new = []
sources_new = []
producers_new = []
ratings_new = []
for each_id in anime_IDs:
    genres_new.append(id_genre_mapping[each_id])
    studios_new.append(id_studio_mapping[each_id])
    sources_new.append(id_source_mapping[each_id])
    producers_new.append(id_producers_mapping[each_id])
    ratings_new.append(id_rating_mapping[each_id])
reduced_df.head()

Unnamed: 0,animeID,title_english,type,source,producers,genre,studio,episodes,premiered,rating,score,scored_by,rank,popularity,members,favorites,synopsis
0,1,Cowboy Bebop,TV,Original,Bandai Visual,Action,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as ""Cowboys."" The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member's dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic and an homage to the smooth and improvised music it is named after. [Written by MAL Rewrite]"
6,5,Cowboy Bebop: The Movie,Movie,Original,Sunrise,Action,Bones,1.0,0,R - 17+ (violence & profanity),8.41,120243,164,449,197791,776,"Another day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this routine is interrupted when Faye, who is chasing a fairly worthless target on Mars, witnesses an oil tanker suddenly explode, causing mass hysteria. As casualties mount due to a strange disease spreading through the smoke from the blast, a whopping three hundred million woolong price is placed on the head of the supposed perpetrator. With lives at stake and a solution to their money problems in sight, the Bebop crew springs into action. Spike, Jet, Faye, and Edward, followed closely by Ein, split up to pursue different leads across Alba City. Through their individual investigations, they discover a cover-up scheme involving a pharmaceutical company, revealing a plot that reaches much further than the ragtag team of bounty hunters could have realized. [Written by MAL Rewrite]"
16,6,Trigun,TV,Manga,Victor Entertainment,Action,Madhouse,26.0,Spring 1998,PG-13 - Teens 13 or older,8.3,212537,255,146,408548,10432,"Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merciless villain who lays waste to all those that oppose him and flattens entire cities for fun, garnering him the title ""The Humanoid Typhoon."" He leaves a trail of death and destruction wherever he goes, and anyone can count themselves dead if they so much as make eye contact—or so the rumors say. In actuality, Vash is a huge softie who claims to have never taken a life and avoids violence at all costs. With his crazy doughnut obsession and buffoonish attitude in tow, Vash traverses the wasteland of the planet Gunsmoke, all the while followed by two insurance agents, Meryl Stryfe and Milly Thompson, who attempt to minimize his impact on the public. But soon, their misadventures evolve into life-or-death situations as a group of legendary assassins are summoned to bring about suffering to the trio. Vash's agonizing past will be unraveled and his morality and principles pushed to the breaking point. [Written by MAL Rewrite]"
22,7,Witch Hunter Robin,TV,Original,Bandai Visual,Action,Sunrise,26.0,Summer 2002,PG-13 - Teens 13 or older,7.33,32837,2371,1171,79397,537,"Witches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-year-old craft user, arrives from Italy to Japan to work for an organization named STN Japan Division (STN-J) as a replacement for one of STN-J's witch hunters who was recently killed. Unlike other divisions of STN, STN-J tries to capture the witches alive in order to learn why and how they became witches in the first place. (Source: ANN)"
28,8,Beet the Vandel Buster,TV,Manga,TV Tokyo,Adventure,Toei Animation,52.0,Fall 2004,PG - Children,7.03,4894,3544,3704,11708,14,"It is the dark century and the people are suffering under the rule of the devil, Vandel, who is able to manipulate monsters. The Vandel Busters are a group of people who hunt these devils, and among them, the Zenon Squad is known to be the strongest busters on the continent. A young boy, Beet, dreams of joining the Zenon Squad. However, one day, as a result of Beet's fault, the Zenon squad was defeated by the devil, Beltose. The five dying busters sacrificed their life power into their five weapons, Saiga. After giving their weapons to Beet, they passed away. Years have passed since then and the young Vandel Buster, Beet, begins his adventure to carry out the Zenon Squad's will to put an end to the dark century."


In [68]:
mlb = MultiLabelBinarizer()
encoded_genres = mlb.fit_transform(genres_new)
encoded_studios = mlb.fit_transform(studios_new)
encoded_sources = mlb.fit_transform(sources_new)
encoded_producers = mlb.fit_transform(producers_new)
encoded_ratings = mlb.fit_transform(ratings_new)

In [69]:
genre_columns_added = encoded_genres.shape[1]

for col_idx in range(genre_columns_added):
    reduced_df.insert(len(reduced_df.columns), "genre_{}".format(col_idx+1), encoded_genres[:, col_idx])

for col_idx in range(encoded_studios.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "studio_{}".format(col_idx+1), encoded_studios[:, col_idx])
    
for col_idx in range(encoded_sources.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "source_{}".format(col_idx+1), encoded_sources[:, col_idx])
    
for col_idx in range(encoded_producers.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "producers_{}".format(col_idx+1), encoded_producers[:, col_idx])
    
for col_idx in range(encoded_ratings.shape[1]):
    reduced_df.insert(len(reduced_df.columns), "rating_{}".format(col_idx+1), encoded_ratings[:, col_idx])

In [70]:
reduced_df.head()

Unnamed: 0,animeID,title_english,type,source,producers,genre,studio,episodes,premiered,rating,score,scored_by,rank,popularity,members,favorites,synopsis,genre_1,genre_2,genre_3,genre_4,genre_5,genre_6,genre_7,genre_8,genre_9,genre_10,genre_11,genre_12,genre_13,genre_14,genre_15,genre_16,genre_17,genre_18,genre_19,genre_20,genre_21,genre_22,genre_23,genre_24,genre_25,genre_26,genre_27,genre_28,genre_29,genre_30,genre_31,genre_32,genre_33,genre_34,genre_35,genre_36,genre_37,genre_38,genre_39,genre_40,studio_1,studio_2,studio_3,studio_4,studio_5,studio_6,studio_7,studio_8,studio_9,studio_10,studio_11,studio_12,studio_13,studio_14,studio_15,studio_16,studio_17,studio_18,studio_19,studio_20,studio_21,studio_22,studio_23,studio_24,studio_25,studio_26,studio_27,studio_28,studio_29,studio_30,studio_31,studio_32,studio_33,studio_34,studio_35,studio_36,studio_37,studio_38,studio_39,studio_40,studio_41,studio_42,studio_43,studio_44,studio_45,studio_46,studio_47,studio_48,studio_49,studio_50,studio_51,studio_52,studio_53,studio_54,studio_55,studio_56,studio_57,studio_58,studio_59,studio_60,studio_61,studio_62,studio_63,studio_64,studio_65,studio_66,studio_67,studio_68,studio_69,studio_70,studio_71,studio_72,studio_73,studio_74,studio_75,studio_76,studio_77,studio_78,studio_79,studio_80,studio_81,studio_82,studio_83,studio_84,studio_85,studio_86,studio_87,studio_88,studio_89,studio_90,studio_91,studio_92,studio_93,studio_94,studio_95,studio_96,studio_97,studio_98,studio_99,studio_100,studio_101,studio_102,studio_103,studio_104,studio_105,studio_106,studio_107,studio_108,studio_109,studio_110,studio_111,studio_112,studio_113,studio_114,studio_115,studio_116,studio_117,studio_118,studio_119,studio_120,studio_121,studio_122,studio_123,studio_124,studio_125,studio_126,studio_127,studio_128,studio_129,studio_130,studio_131,studio_132,studio_133,studio_134,studio_135,studio_136,studio_137,studio_138,studio_139,studio_140,studio_141,studio_142,studio_143,studio_144,studio_145,studio_146,studio_147,studio_148,studio_149,studio_150,studio_151,studio_152,studio_153,studio_154,studio_155,studio_156,studio_157,studio_158,studio_159,studio_160,studio_161,studio_162,studio_163,studio_164,studio_165,studio_166,studio_167,studio_168,studio_169,studio_170,studio_171,studio_172,studio_173,studio_174,studio_175,studio_176,studio_177,studio_178,studio_179,studio_180,studio_181,studio_182,studio_183,studio_184,studio_185,studio_186,studio_187,studio_188,studio_189,studio_190,studio_191,studio_192,studio_193,studio_194,studio_195,studio_196,studio_197,studio_198,studio_199,studio_200,studio_201,studio_202,studio_203,studio_204,studio_205,studio_206,studio_207,studio_208,studio_209,studio_210,studio_211,studio_212,studio_213,studio_214,studio_215,studio_216,studio_217,studio_218,studio_219,studio_220,studio_221,studio_222,studio_223,studio_224,studio_225,studio_226,studio_227,studio_228,studio_229,studio_230,studio_231,studio_232,studio_233,studio_234,studio_235,studio_236,studio_237,studio_238,studio_239,studio_240,studio_241,studio_242,studio_243,studio_244,studio_245,studio_246,studio_247,studio_248,studio_249,studio_250,studio_251,studio_252,studio_253,studio_254,studio_255,studio_256,studio_257,studio_258,studio_259,studio_260,studio_261,studio_262,studio_263,studio_264,studio_265,studio_266,studio_267,studio_268,studio_269,studio_270,studio_271,studio_272,studio_273,studio_274,studio_275,studio_276,studio_277,studio_278,studio_279,studio_280,studio_281,studio_282,studio_283,studio_284,studio_285,studio_286,studio_287,studio_288,studio_289,studio_290,studio_291,studio_292,source_1,source_2,source_3,source_4,source_5,source_6,source_7,source_8,source_9,source_10,source_11,source_12,source_13,source_14,source_15,producers_1,producers_2,producers_3,producers_4,producers_5,producers_6,producers_7,producers_8,producers_9,producers_10,producers_11,producers_12,producers_13,producers_14,producers_15,producers_16,producers_17,producers_18,producers_19,producers_20,producers_21,producers_22,producers_23,producers_24,producers_25,producers_26,producers_27,producers_28,producers_29,producers_30,producers_31,producers_32,producers_33,producers_34,producers_35,producers_36,producers_37,producers_38,producers_39,producers_40,producers_41,producers_42,producers_43,producers_44,producers_45,producers_46,producers_47,producers_48,producers_49,producers_50,producers_51,producers_52,producers_53,producers_54,producers_55,producers_56,producers_57,producers_58,producers_59,producers_60,producers_61,producers_62,producers_63,producers_64,producers_65,producers_66,producers_67,producers_68,producers_69,producers_70,producers_71,producers_72,producers_73,producers_74,producers_75,producers_76,producers_77,producers_78,producers_79,producers_80,producers_81,producers_82,producers_83,producers_84,producers_85,producers_86,producers_87,producers_88,producers_89,producers_90,producers_91,producers_92,producers_93,producers_94,producers_95,producers_96,producers_97,producers_98,producers_99,producers_100,producers_101,producers_102,producers_103,producers_104,producers_105,producers_106,producers_107,producers_108,producers_109,producers_110,producers_111,producers_112,producers_113,producers_114,producers_115,producers_116,producers_117,producers_118,producers_119,producers_120,producers_121,producers_122,producers_123,producers_124,producers_125,producers_126,producers_127,producers_128,producers_129,producers_130,producers_131,producers_132,producers_133,producers_134,producers_135,producers_136,producers_137,producers_138,producers_139,producers_140,producers_141,producers_142,producers_143,producers_144,producers_145,producers_146,producers_147,producers_148,producers_149,producers_150,producers_151,producers_152,producers_153,producers_154,producers_155,producers_156,producers_157,producers_158,producers_159,producers_160,producers_161,producers_162,producers_163,producers_164,producers_165,producers_166,producers_167,producers_168,producers_169,producers_170,producers_171,producers_172,producers_173,producers_174,producers_175,producers_176,producers_177,producers_178,producers_179,producers_180,producers_181,producers_182,producers_183,producers_184,producers_185,producers_186,producers_187,producers_188,producers_189,producers_190,producers_191,producers_192,producers_193,producers_194,producers_195,producers_196,producers_197,producers_198,producers_199,producers_200,producers_201,producers_202,producers_203,producers_204,producers_205,producers_206,producers_207,producers_208,producers_209,producers_210,producers_211,producers_212,producers_213,producers_214,producers_215,producers_216,producers_217,producers_218,producers_219,producers_220,producers_221,producers_222,producers_223,producers_224,producers_225,producers_226,producers_227,producers_228,producers_229,producers_230,producers_231,producers_232,producers_233,producers_234,producers_235,producers_236,producers_237,producers_238,producers_239,producers_240,producers_241,producers_242,producers_243,producers_244,producers_245,producers_246,producers_247,producers_248,producers_249,producers_250,producers_251,producers_252,producers_253,producers_254,producers_255,producers_256,producers_257,producers_258,producers_259,producers_260,producers_261,producers_262,producers_263,producers_264,producers_265,producers_266,producers_267,producers_268,producers_269,producers_270,producers_271,producers_272,producers_273,producers_274,producers_275,producers_276,producers_277,producers_278,producers_279,producers_280,producers_281,producers_282,producers_283,producers_284,producers_285,producers_286,producers_287,producers_288,producers_289,producers_290,producers_291,producers_292,producers_293,producers_294,producers_295,producers_296,producers_297,producers_298,producers_299,producers_300,producers_301,producers_302,producers_303,producers_304,producers_305,producers_306,producers_307,producers_308,producers_309,producers_310,producers_311,producers_312,producers_313,producers_314,producers_315,producers_316,producers_317,producers_318,producers_319,producers_320,producers_321,producers_322,producers_323,producers_324,producers_325,producers_326,producers_327,producers_328,producers_329,producers_330,producers_331,producers_332,producers_333,producers_334,producers_335,producers_336,producers_337,producers_338,producers_339,producers_340,producers_341,producers_342,producers_343,producers_344,producers_345,producers_346,producers_347,producers_348,producers_349,producers_350,producers_351,producers_352,producers_353,producers_354,producers_355,producers_356,producers_357,producers_358,producers_359,producers_360,producers_361,producers_362,producers_363,producers_364,producers_365,producers_366,producers_367,producers_368,producers_369,producers_370,producers_371,producers_372,producers_373,producers_374,producers_375,producers_376,producers_377,producers_378,producers_379,producers_380,producers_381,producers_382,producers_383,producers_384,producers_385,producers_386,producers_387,producers_388,producers_389,producers_390,producers_391,producers_392,producers_393,producers_394,producers_395,producers_396,producers_397,producers_398,producers_399,producers_400,producers_401,producers_402,producers_403,producers_404,producers_405,producers_406,producers_407,producers_408,producers_409,producers_410,producers_411,producers_412,producers_413,producers_414,producers_415,producers_416,producers_417,producers_418,producers_419,producers_420,producers_421,producers_422,producers_423,producers_424,producers_425,producers_426,producers_427,producers_428,producers_429,producers_430,producers_431,producers_432,producers_433,producers_434,producers_435,producers_436,producers_437,producers_438,producers_439,producers_440,producers_441,producers_442,producers_443,producers_444,producers_445,producers_446,producers_447,producers_448,producers_449,producers_450,producers_451,producers_452,producers_453,producers_454,producers_455,producers_456,producers_457,producers_458,producers_459,producers_460,producers_461,producers_462,producers_463,producers_464,producers_465,producers_466,producers_467,producers_468,producers_469,producers_470,producers_471,producers_472,producers_473,producers_474,producers_475,producers_476,producers_477,producers_478,producers_479,producers_480,producers_481,producers_482,producers_483,producers_484,producers_485,producers_486,producers_487,producers_488,producers_489,producers_490,producers_491,producers_492,producers_493,producers_494,producers_495,producers_496,producers_497,producers_498,producers_499,producers_500,producers_501,producers_502,producers_503,producers_504,producers_505,producers_506,producers_507,producers_508,producers_509,producers_510,producers_511,producers_512,producers_513,producers_514,producers_515,producers_516,producers_517,producers_518,producers_519,producers_520,producers_521,producers_522,producers_523,producers_524,producers_525,producers_526,producers_527,producers_528,producers_529,producers_530,producers_531,producers_532,producers_533,producers_534,producers_535,producers_536,producers_537,producers_538,producers_539,producers_540,producers_541,producers_542,producers_543,producers_544,producers_545,producers_546,producers_547,producers_548,producers_549,producers_550,producers_551,producers_552,producers_553,producers_554,producers_555,producers_556,producers_557,producers_558,producers_559,producers_560,producers_561,producers_562,producers_563,producers_564,producers_565,producers_566,producers_567,producers_568,producers_569,producers_570,producers_571,producers_572,producers_573,producers_574,producers_575,producers_576,producers_577,producers_578,producers_579,producers_580,producers_581,producers_582,producers_583,producers_584,producers_585,producers_586,producers_587,producers_588,producers_589,producers_590,producers_591,producers_592,producers_593,producers_594,producers_595,producers_596,producers_597,producers_598,producers_599,producers_600,producers_601,producers_602,producers_603,producers_604,producers_605,producers_606,producers_607,producers_608,producers_609,producers_610,producers_611,producers_612,producers_613,producers_614,producers_615,producers_616,producers_617,producers_618,producers_619,producers_620,producers_621,producers_622,producers_623,producers_624,producers_625,producers_626,producers_627,producers_628,producers_629,producers_630,producers_631,producers_632,producers_633,producers_634,producers_635,producers_636,producers_637,producers_638,producers_639,producers_640,producers_641,producers_642,producers_643,producers_644,producers_645,producers_646,producers_647,producers_648,producers_649,producers_650,producers_651,producers_652,producers_653,producers_654,producers_655,producers_656,producers_657,producers_658,producers_659,producers_660,producers_661,producers_662,producers_663,producers_664,producers_665,producers_666,producers_667,producers_668,producers_669,producers_670,producers_671,producers_672,producers_673,producers_674,producers_675,producers_676,producers_677,producers_678,producers_679,producers_680,producers_681,producers_682,producers_683,producers_684,producers_685,producers_686,producers_687,producers_688,producers_689,producers_690,producers_691,producers_692,producers_693,producers_694,producers_695,producers_696,producers_697,producers_698,producers_699,producers_700,producers_701,producers_702,producers_703,producers_704,producers_705,producers_706,producers_707,producers_708,producers_709,producers_710,producers_711,producers_712,producers_713,producers_714,producers_715,producers_716,producers_717,producers_718,producers_719,producers_720,producers_721,producers_722,producers_723,producers_724,producers_725,producers_726,producers_727,producers_728,producers_729,producers_730,producers_731,producers_732,rating_1,rating_2,rating_3,rating_4,rating_5,rating_6
0,1,Cowboy Bebop,TV,Original,Bandai Visual,Action,Sunrise,26.0,Spring 1998,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460,"In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as ""Cowboys."" The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member's dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic and an homage to the smooth and improvised music it is named after. [Written by MAL Rewrite]",1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
6,5,Cowboy Bebop: The Movie,Movie,Original,Sunrise,Action,Bones,1.0,0,R - 17+ (violence & profanity),8.41,120243,164,449,197791,776,"Another day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this routine is interrupted when Faye, who is chasing a fairly worthless target on Mars, witnesses an oil tanker suddenly explode, causing mass hysteria. As casualties mount due to a strange disease spreading through the smoke from the blast, a whopping three hundred million woolong price is placed on the head of the supposed perpetrator. With lives at stake and a solution to their money problems in sight, the Bebop crew springs into action. Spike, Jet, Faye, and Edward, followed closely by Ein, split up to pursue different leads across Alba City. Through their individual investigations, they discover a cover-up scheme involving a pharmaceutical company, revealing a plot that reaches much further than the ragtag team of bounty hunters could have realized. [Written by MAL Rewrite]",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
16,6,Trigun,TV,Manga,Victor Entertainment,Action,Madhouse,26.0,Spring 1998,PG-13 - Teens 13 or older,8.3,212537,255,146,408548,10432,"Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merciless villain who lays waste to all those that oppose him and flattens entire cities for fun, garnering him the title ""The Humanoid Typhoon."" He leaves a trail of death and destruction wherever he goes, and anyone can count themselves dead if they so much as make eye contact—or so the rumors say. In actuality, Vash is a huge softie who claims to have never taken a life and avoids violence at all costs. With his crazy doughnut obsession and buffoonish attitude in tow, Vash traverses the wasteland of the planet Gunsmoke, all the while followed by two insurance agents, Meryl Stryfe and Milly Thompson, who attempt to minimize his impact on the public. But soon, their misadventures evolve into life-or-death situations as a group of legendary assassins are summoned to bring about suffering to the trio. Vash's agonizing past will be unraveled and his morality and principles pushed to the breaking point. [Written by MAL Rewrite]",1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
22,7,Witch Hunter Robin,TV,Original,Bandai Visual,Action,Sunrise,26.0,Summer 2002,PG-13 - Teens 13 or older,7.33,32837,2371,1171,79397,537,"Witches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-year-old craft user, arrives from Italy to Japan to work for an organization named STN Japan Division (STN-J) as a replacement for one of STN-J's witch hunters who was recently killed. Unlike other divisions of STN, STN-J tries to capture the witches alive in order to learn why and how they became witches in the first place. (Source: ANN)",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
28,8,Beet the Vandel Buster,TV,Manga,TV Tokyo,Adventure,Toei Animation,52.0,Fall 2004,PG - Children,7.03,4894,3544,3704,11708,14,"It is the dark century and the people are suffering under the rule of the devil, Vandel, who is able to manipulate monsters. The Vandel Busters are a group of people who hunt these devils, and among them, the Zenon Squad is known to be the strongest busters on the continent. A young boy, Beet, dreams of joining the Zenon Squad. However, one day, as a result of Beet's fault, the Zenon squad was defeated by the devil, Beltose. The five dying busters sacrificed their life power into their five weapons, Saiga. After giving their weapons to Beet, they passed away. Years have passed since then and the young Vandel Buster, Beet, begins his adventure to carry out the Zenon Squad's will to put an end to the dark century.",0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [76]:
#### Decreasing Count of Entries for each column value

#set options to see full dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

reduced_df.groupby('genre').count().sort_values(by=["animeID"], ascending=False)['animeID']
    #Keep top 15? Add Horror, Historical?

#reduced_df.groupby('studio').count().sort_values(by=["animeID"], ascending=False)['animeID']
    #Keep studios with 20+ records?

#reduced_df.groupby('source').count().sort_values(by=["animeID"], ascending=False)['animeID']
    #Keep all 15?

#reduced_df.groupby('producers').count().sort_values(by=["animeID"], ascending=False)['animeID']
    #Keep Producers with 30+ records?

#reduced_df.groupby('rating').count().sort_values(by=["animeID"], ascending=False)['animeID']
    #Keep all except 'None'?

genre
Action           1122
Comedy           520 
Adventure        287 
Slice of Life    179 
Drama            136 
Sci-Fi           111 
Mystery          69  
Music            59  
Fantasy          54  
Game             43  
Harem            42  
Military         25  
Ecchi            22  
Kids             21  
Romance          21  
Sports           21  
Magic            16  
Dementia         16  
Historical       16  
Psychological    13  
Horror           12  
Supernatural     9   
Mecha            8   
Demons           8   
Space            6   
School           5   
Cars             3   
Parody           2   
Martial Arts     2   
Seinen           2   
Police           1   
Samurai          1   
Shounen          1   
Josei            1   
Thriller         1   
Name: animeID, dtype: int64

# Encoding textual data - tokenization approach
* May want to look into cleaning all the synopsis first, ex: remove (), lower case, etc (disregarding this for now)

In [28]:
synopsis_list = reduced_df['synopsis'].tolist()
# synopsis_list

In [64]:
VOCAB_SIZE = None
MAX_SEQ_LEN = 0

# find vocab_size
all_words = {}

# DEBUG
debug = False
init = 0
count = 100

for synopsis in synopsis_list:

    # =========================
    if debug:
        if init == count - 1:
            print("Original Synopsis:")
            print(synopsis + "\n")
    # =========================
    
    # deletes synopsis credits e.g. [Written by MAL Rewrite], (Source: ANN)
    if synopsis[-1] == ")":
        idx = synopsis.rfind("(")
        synopsis = synopsis[:idx]
    elif synopsis[-1] == "]":
        idx = synopsis.rfind("[")
        synopsis = synopsis[:idx]
    
    # deletes punctuation
    punctuation = string.punctuation
    for c in punctuation:
        synopsis = synopsis.replace(c, "")
    
    synopsis = synopsis.lower()
    
    # =========================
    if debug:
        if init == count - 1:
            print("Lower Case Synopsis Without Punctuation:")
            print(synopsis + "\n")
    # =========================

    word_list = synopsis.split(" ")
    
    
    # =========================
    if debug:
        if init == count - 1:
            print("Original Word List:")
            print(str(word_list) + "\n")
    # =========================
    
    # remove stop words
    word_list = [x for x in word_list if x not in stop_words]
    
    
    # =========================
    if debug:
        if init == count - 1:
            print("Word List Without Stop Words:")
            print(str(word_list) + "\n")
        init += 1
    # =========================
    
    # find max seq len
    if len(word_list) > MAX_SEQ_LEN:
        MAX_SEQ_LEN = len(word_list)
        sent = word_list

    for ea_word in word_list:
        
        if ea_word in all_words:
            all_words[ea_word] += 1
        else:
            all_words[ea_word] = 1
VOCAB_SIZE = len(all_words.keys())
print ('vocab_size = ', VOCAB_SIZE)
print ('max_seq_len = ', MAX_SEQ_LEN)
# print (MAX_SEQ_LEN, sent)

vocab_size =  21259
max_seq_len =  290


In [63]:
encoded_synopsis = [one_hot(x, VOCAB_SIZE) for x in synopsis_list]
padded_synopsis = pad_sequences(encoded_synopsis, maxlen=MAX_SEQ_LEN, padding='pre')
padded_synopsis.shape

(1867, 290)

In [61]:
# load pretrained google word2vec model
model = gensim.models.KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin.gz', binary=True)

FileNotFoundError: [Errno 2] No such file or directory: './data/GoogleNews-vectors-negative300.bin.gz'

In [None]:
# get averaged word embedding
EMBED_DIM = 300

all_synopsis_vectors = np.empty((0, EMBED_DIM))
print (all_synopsis_vectors.shape)
for each_synopsis in synopsis_list:
    synopsis_words = each_synopsis.split(" ")
    word_count = len(synopsis_words)
    synopsis_sum_vector = np.zeros((EMBED_DIM,))
    for each_word in synopsis_words:
        try:
            synopsis_sum_vector += model[each_word]
        except:
            # word not in pretrained vocab
            pass
    synopsis_avg_vector = (synopsis_sum_vector / word_count).reshape(1, -1)
    all_synopsis_vectors = np.append(all_synopsis_vectors, synopsis_avg_vector, axis=0)
all_synopsis_vectors.shape

In [None]:
synopsis_columns_added = all_synopsis_vectors.shape[1]
for col_idx in range(synopsis_columns_added):
    reduced_df.insert(len(reduced_df.columns), "synopsis_embedded_{}".format(col_idx+1), all_synopsis_vectors[:, col_idx])
reduced_df.head()