In [32]:
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import nltk
from IPython.display import display
pd.options.display.max_columns = None
from nltk import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [2]:
# Import the datasets to work with.
df = pd.read_csv('wiki_movie_plots_deduped.csv')
df_genre = pd.read_csv("title.basics.tsv",delimiter="\t")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
# Lets review the shapes of the dataframes.
display(df.shape)
display(df_genre.shape) # This one is much longer 

(34886, 8)

(5463542, 9)

In [4]:
# Lets review a small sample of the dataframe to see what we have. 
display(df.head(2))
display(df_genre.head(2))

Unnamed: 0,Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Wiki Page,Plot
0,1901,Kansas Saloon Smashers,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Kansas_Saloon_Sm...,"A bartender is working at a saloon, serving dr..."
1,1901,Love by the Light of the Moon,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Love_by_the_Ligh...,"The moon, painted with a smiling face hangs ov..."


Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"


In [5]:
# From the df_genre dataframe i just need the title of the movie and the genres column, so lets just work with that.
df_genre_used = df_genre[["primaryTitle","genres"]]
df_genre_used = df_genre_used.rename(columns={"primaryTitle":"Title"}) # I

In [6]:
# Lets merge the dfs to add the genres to our original df
df_joined = df.merge(df_genre_used,on="Title").iloc[:20000]

In [7]:
# The original df had 34.886 rows only, the merged one has 261.399 rows, this mean that there are repeated movies.
display("shape of joined df: {}".format(df_joined.shape))

# Lets see how many unique movies we had (should be  <= 34.886 because the joined do a inner join)
display("Unique movies: {}".format(len(df_joined["Title"].unique().tolist())))

# Example of a repeated movie (42 times): 
jack = len(df_genre_used[df_genre_used["Title"]=="Jack and the Beanstalk"])
display("For example,the movie Jack and the Beanstalk appears {} times".format(jack))

'shape of joined df: (20000, 9)'

'Unique movies: 1033'

'For example,the movie Jack and the Beanstalk appears 42 times'

In [8]:
# Indeed, there are movies that repeat many times that we must clean. Lets just keep the columns that i need.
df_joined_c = df_joined.loc[:,("Release Year","Title","Plot","genres")]
# Lets drop duplicate rows, There were almost 95 thousand repeated rows (261399 - 167621).
df_joined_c.drop_duplicates(inplace=True)
display(df_joined_c.shape)
df_joined_c.head(3)

(11216, 4)

Unnamed: 0,Release Year,Title,Plot,genres
0,1901,Kansas Saloon Smashers,"A bartender is working at a saloon, serving dr...","Comedy,Short"
1,1901,Love by the Light of the Moon,"The moon, painted with a smiling face hangs ov...","Comedy,Fantasy,Romance"
2,1901,The Martyred Presidents,"The film, just over a minute long, is composed...",Short


In [9]:
# Lets create a function to select the most repeated genres for a Title.
def get_best_genres(movie):
    """The function transform an array of genres to a tuple of a maximum of the 3 most repeated
    genres for a given Title"""    
    genres = df_joined_c[df_joined_c["Title"]== movie]["genres"].tolist() # List of genres
    list_genres = (",".join(genres)).split(",")
    return pd.Series(list_genres).value_counts().index[:3].tolist()

get_best_genres("The Martyred Presidents")   

['Short']

In [10]:
#df_processed = (pd.DataFrame([{"Release_Year":np.nan,"Title":np.nan,"Plot":np.nan,"Genres":np.nan}])).dropna()
empty = []
def genres(movie):    
    genres = df_joined_c[df_joined_c["Title"]== movie]["genres"].tolist() # List of genres
    rel_year = df_joined_c[df_joined_c["Title"]== movie]["Release Year"].iloc[0] # real year 
    title = df_joined_c[df_joined_c["Title"]== movie]["Title"].iloc[0] # title
    plot = df_joined_c[df_joined_c["Title"]== movie]["Plot"].iloc[0] # plot
    list_genres = (",".join(genres)).split(",") # genres in list 
    genres_proc = pd.Series(list_genres).value_counts().index[:3].tolist() # Top 3 most repeated genres
    inf = {"Release_Year":rel_year,"Title":title,"Plot":plot,"Genres":genres_proc} # Final row to add
    return empty.append(inf)


In [11]:
%%time
unique_titles = df_joined_c["Title"].unique().tolist()
for title in unique_titles:
    genres(title)

CPU times: user 12.6 s, sys: 99.5 ms, total: 12.7 s
Wall time: 16.1 s


In [37]:
movies = pd.DataFrame(empty)
movies.head()

Unnamed: 0,Genres,Plot,Release_Year,Title
0,"[Comedy, Short]","A bartender is working at a saloon, serving dr...",1901,Kansas Saloon Smashers
1,"[Comedy, Romance, Fantasy]","The moon, painted with a smiling face hangs ov...",1901,Love by the Light of the Moon
2,[Short],"The film, just over a minute long, is composed...",1901,The Martyred Presidents
3,"[Comedy, Short]",Lasting just 61 seconds and consisting of two ...,1901,"Terrible Teddy, the Grizzly King"
4,"[Family, Animation, Comedy]",The earliest known adaptation of the classic f...,1902,Jack and the Beanstalk


In [38]:
#movies["Genres"].str.split(',',expand=True)
movies["Genres_processed"]= movies["Genres"].apply(lambda x: ",".join(map(str,x)))

In [39]:
movies[["gen1","gen2","gen3"]] = movies["Genres_processed"].str.split(",",expand=True)
a = list(set(movies["gen1"].tolist()))
b = list(set(movies["gen2"].tolist()))
c = list(set(movies["gen3"].tolist()))
display("Amount of genres: {}".format(len(list(set(a+b+c)))))
genre_list= list(set(a+b+c))

'Amount of genres: 29'

In [40]:
for genre in genre_list:
    movies[genre] = movies["Genres"].apply(lambda x: 1 if genre in x else 0)

In [41]:
movies_cleaned = movies.drop(["Genres","Genres_processed","gen1","gen2","gen3","\\N",np.nan],axis=1)

In [42]:
movies_cleaned["Release_Year"] = movies_cleaned["Release_Year"].astype("category")

In [43]:
def tokenize(s):
    return word_tokenize(s)

def stem_and_lemmatize(l):
    ps = PorterStemmer()
    stemmed = [ps.stem(word).lower() for word in l]
    lemmatizer = WordNetLemmatizer()
    return [lemmatizer.lemmatize(word) for word in stemmed]

def remove_stopwords(l):
    stopWords = set(stopwords.words('english'))
    morewords = ["``","''","'s",] 
    filtered = [word for word in l if word not in stopWords and word not in morewords and len(word)>1]
    return filtered

In [44]:
movies_cleaned["Plot"] = movies_cleaned["Plot"].apply(tokenize)

In [45]:
movies_cleaned["Plot"] = movies_cleaned["Plot"].apply(stem_and_lemmatize)

In [46]:
movies_cleaned["Plot"] = movies_cleaned["Plot"].apply(remove_stopwords)

In [47]:
words = movies_cleaned["Plot"].tolist()
all_words = [word2 for word in words for word2 in word]
all_words2 = nltk.FreqDist(all_words)
most_common_words = [w[0] for w in all_words2.most_common(1000)]

In [50]:
for word in most_common_words:
    movies_cleaned[word] = movies_cleaned["Plot"].apply(lambda x: 1 if word in x else 0)

In [54]:
movies_cleaned.head(2)

Unnamed: 0,Plot,Release_Year,Title,Fantasy,Drama,Animation,Crime,Western,Romance,Short,Comedy,Musical,Documentary,Music,Action,Biography,Talk-Show,Reality-TV,Film-Noir,Sport,News,Adventure,Family,Thriller,Adult,History,Horror,War,Mystery,Sci-Fi,hi,ha,love,marri,take,find,get,one,man,leav,return,father,becom,tell,film,back,howev,woman,two,young,see,tri,girl,fall,home,mari,go,make,wa,wife,come,play,new,time,day,goe,kill,friend,later,thi,away,give,arriv,meet,daughter,befor,john,onli,decid,doe,work,end,show,live,hous,mother,husband,life,child,discov,son,famili,money,help,escap,final,attempt,dure,anoth,also,ask,night,first,men,brother,jim,follow,order,polic,save,turn,run,plan,marriag,want,eventu,way,die,use,manag,room,realiz,next,name,year,meanwhil,set,boy,soon,forc,learn,death,fight,murder,joe,town,wealthi,mrs.,place,rescu,believ,offic,break,agre,parti,refus,offer,reveal,ship,train,war,american,begin,becaus,call,win,visit,citi,stori,sister,jack,old,know,say,send,tom,captain,even,coupl,fire,head,prison,littl,happi,describ,princ,onc,start,harold,think,york,paul,appear,still,jimmi,three,job,upon,look,taken,open,stop,car,hand,buster,lead,engag,shot,ladi,king,german,magazin,scene,left,danc,togeth,well,charli,keaton,hide,without,keep,wed,arrest,attack,ann,caus,n't,perform,bring,join,long,steal,sent,confess,inform,enter,white,gang,bill,world,richard,interest,much,de,put,island,pas,bob,convinc,around,dead,uncl,hope,like,letter,along,receiv,though,veri,instead,dr.,success,robert,georg,real,fortun,soldier,state,gun,chase,sever,apart,involv,attract,helen,walk,door,accept,doctor,alic,throw,harri,despit,dress,reunit,dorothi,shoot,face,wait,compani,chang,must,flee,rival,part,travel,would,land,last,local,captur,street,pay,small,lord,william,david,feel,french,found,promis,kiss,thing,behind,beauti,lose,act,rich,billi,tramp,countri,spend,allow,jame,free,move,secret,stay,propos,louis,kitti,boat,busi,water,peopl,steve,front,abl,immedi,search,alon,babi,invit,includ,crime,let,never,made,toni,plot,lost,affair,former,peter,aunt,ride,hors,rais,wound,continu,remain,lover,announc,actual,salli,carri,reach,miss,race,desert,von,accident,need,knock,pari,claim,good,hear,struggl,happen,care,sing,betti,charl,result,store,owner,la,pretend,line,brown,reject,great,note,dream,although,notic,unit,confront,hold,persuad,gener,drive,kidnap,releas,arm,seek,explain,attent,hire,threaten,british,crook,heart,spot,sweetheart,arrang,villag,catch,armi,court,stage,fatti,nativ,caught,black,buy,virginia,sinc,group,close,given,poor,drunk,karl,drink,lee,guest,right,battl,mani,hotel,prove,count,dinner,school,juan,dan,station,hall,cloth,bank,encount,safe,bodi,insist,adopt,grow,mr.,parent,trap,morn,truth,america,star,guard,edward,outsid,unabl,chanc,everyon,warn,commit,suicid,true,beg,henri,recogn,estat,peggi,ani,dog,robberi,sneak,present,window,game,romanc,troubl,attend,demand,prepar,lie,fear,suspect,angela,eddi,dick,insid,second,may,member,cabin,princess,gangster,jealou,girlfriend,employ,west,policeman,read,crew,neighbor,whose,scheme,reveng,jail,admir,detect,fallen,big,sell,assum,grant,board,indian,gypsi,sheriff,stand,larri,divorc,joan,enough,famou,brought,talk,declar,accus,protect,pursu,gold,diana,public,jerri,charg,seen,determin,afterward,assist,happili,molli,late,enlist,aboard,fail,bos,johnni,best,side,mysteri,franc,intend,past,sign,oper,rush,abandon,write,wish,partner,person,charact,relationship,inherit,hospit,affect,everyth,pictur,hit,bed,rob,romant,wit,colleg,ball,newspap,mission,complet,command,engin,passeng,leader,introduc,watch,farm,shock,frank,ident,south,beeri,nanci,pull,eye,quickli,recov,crimin,fli,judg,danger,moment,trip,orphan,build,noth,servant,secretli,word,advanc,shop,larg,pick,alreadi,england,depart,team,suitor,boyfriend,news,clear,near,jesu,sentenc,barbara,manner,music,tommi,track,short,fact,aid,themselv,lock,fellow,someon,report,bar,better,toward,impress,prevent,chaplin,quit,lawyer,michael,known,stolen,paper,desper,provid,sail,ranch,deal,polli,older,sea,minist,ward,margaret,mansion,societi,colonel,pose,admit,roger,pirat,dori,christin,theater,told,plead,social,reconcil,gray,younger,millionair,san,ben,overhear,suggest,duke,simon,maggi,four,avoid,across,among,surviv,effort,check,surpris,enemi,jane,spi,widow,high,seri,sir,serv,differ,nearbi,maid,flirt,titl,accompani,storm,ensu,lieuten,point,song,baron,mine,mistak,sylvia,park,sit,sleep,wall,cross,chief,club,flower,gone,fiancé,dancer,artist,mabel,due,support,injur,london,mont,realli,actor,drop,cut,led,jump,dolli,vow,thief,church,trick,honor,futur,exchang,disguis,sergeant,jewel,innoc,posit,deliv,financi,ruth,terri,major,hank,paint,photograph,tear,hunt,camp,execut,speak,develop,gain,yacht,privat,butler,ruin,month,book,obtain,van,agent,wild,entir,onto,respons,situat,peac,red,seem,union,power,beau,action,almost,kid,initi,investig,bennett,lloyd,maria,professor,al,singer,judi,climb,someth,queen,bandit,emerg,bedroom,burn,pair,civil,charm,pretti,produc,intent,air,origin,rose,god,elsi,convict,joy,custom,tree,hidden,disappear,belong,crash,gambl,wear,earlier,separ,telephon,nurs,case,broken,audienc,mind,thrown,share,five,inn,max,fred,ami,pilot,circu,phantom,river,elop,angri,rest,wander,settl,done,tin,challeng,choos,idea,delight,career,forgiv,sheik,destroy,aristocrat,socialit,edith,duel,faith,arthur,robin,sam,willi,aircraft,annabel,hang,embrac,eat,tie,form,restaur,condit,loui,crowd,band,could,rather,acquaint,clerk,leg,movi,tabl,replac,nora,seduc,spike,stone,francisco,scandal,jean,foster,roy,foot,restor,wrong,approach,drag,lawrenc,earli,smith,field,thu,practic,anger,week,fals,shortli,treat,law,grandfath,switch,mark,walter,defend,longer,anyth,oliv,food,alli,africa,kelli,nightclub,jr.,ronald,pollyanna,raoul,mcteagu,base,card,popular,eve,milli,cover,ill,role,comfort,le,blame,ring,secretari,ted,trial,light,thiev,ralph,cattl,stan,davidson,amo,morgan,chick,safeti,celebr,escort,mean,cheat,effect,held,desir,deni,martha,blow,class,marion,necklac,far,ned,everi,upset,elmer,figur,bad,blackmail,berti,servic,date,experi,cast,number,moran,buddi,kent,rupert,norma,victim,respect,strike,fish,nephew,sure,hour,unawar,unfortun,posse,expo,letti,suffer,fake,femal,floor,coach,cours,lewi,beat,accid,california,grace,teach,poe,sword,marcia,reason,hugh,fairbank,tire,favor,event,earl,europ,leo,mickey,stella,ethel,convent
0,"[bartend, work, saloon, serv, drink, custom, f...",1901,Kansas Saloon Smashers,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"[moon, paint, smile, face, hang, park, night, ...",1901,Love by the Light of the Moon,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [59]:
X = movies_cleaned.iloc[:,30:]
Y = movies_cleaned.iloc[:,3:30]



Unnamed: 0,Fantasy,Drama,Animation,Crime,Western,Romance,Short,Comedy,Musical,Documentary,Music,Action,Biography,Talk-Show,Reality-TV,Film-Noir,Sport,News,Adventure,Family,Thriller,Adult,History,Horror,War,Mystery,Sci-Fi
0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
