In [None]:
import xml.dom.minidom
import re
import nltk
from wordcloud import WordCloud
import matplotlib.pylab as plt
import pandas as pd
import json
import os
import pickle

In [2]:
def tokenExtractor(file):
    data = open(file, encoding="utf-8").read()

    doc = xml.dom.minidom.parse(file)

    movieText = ""

    for item in doc.getElementsByTagName("s"):
        for child in item.childNodes:
            if child.nodeName == "#text" and len(re.findall("\w", child.nodeValue)) > 1:
                movieText += child.nodeValue

    movieText = re.sub("\n\s+", " ", movieText)
    movieText = re.sub("\n", "", movieText)

    pattern = r'''(?x)(?:[A-Z]\.)+ | \w+(?:-\w+)*  | '''
    tokens = nltk.regexp_tokenize(movieText, pattern)

    #Remove non alphanumericbetic characters
    tokens = [w for w in tokens if re.search(r'\w', w)]

    #Remove stopwords
    stopwords = nltk.corpus.stopwords.words('english')
    tokens = [w for w in tokens if w.lower() not in stopwords]

    #Remove numbers 
    tokens = [w for w in tokens if not re.search(r'\d', w)]

    #Lower case
    tokens = [t.lower() for t in tokens]
    tokens = [item for item in tokens if item.isalpha()]
    
    return tokens

In [3]:
def tf_calculator(l):
    tf = {}
    
    for item in l:
        if item in tf:
            tf[item] += 1
        else:
            tf[item] = 1
    
    return tf

In [4]:
def has_genre(genre, movie_data):
    genres_data = movie_data["genres"].replace("'", '"')
    genres_json = json.loads(genres_data)
    for g_object in genres_json:
        if g_object["name"] == genre:
            return True
    return False

In [15]:
def loadData():
    metadata_file = "movies_metadata.csv"
    genre_set = {""}

    metadata_data = pd.read_csv(metadata_file, encoding ='utf-8')
    for genre_list in metadata_data["genres"]:
        genre_list = genre_list.replace("'", '"')
        genre_json = json.loads(genre_list)
        for genre in genre_json:
            genre_set.add(genre["name"])
    
    return genre_set, metadata_data

In [45]:
def generateTokensFor(choosen_genre, metadata_data):
    tokens = []

    for index, movie in metadata_data.iterrows():
        if has_genre(choosen_genre, movie):
            release_date = movie["release_date"].split("-")[0]
            movie_id = str(movie["imdb_id"])

            if len(movie_id) > 1:
                while movie_id[0] == 't' or movie_id[0] == '0' :
                    movie_id = movie_id[1:]

            movieDir = "/Subs/en/{}/{}".format(release_date, movie_id)

            try:
                files = os.listdir(movieDir)
                movie_file = movieDir + '/' + files[0]

                tokens = tokens + tokenExtractor(movie_file)
            except:
                print("Cannot find file " + movieDir)
                
    return tokens

In [46]:
def getWordcloud(tokens):
    tokens_tf = tf_calculator(tokens)

    idf = dict()

    for word in set(tokens):
        idf[word] = 1

    tokens_tfidf = dict()

    for word in tokens_tf:
        tokens_tfidf[word] = tokens_tf[word] * idf[word]
        
    wordcloud = None
    
    if len(tokens_tfidf) > 0:
        wordcloud = WordCloud(width=1000, height=1000, background_color="#EEEEEE").generate_from_frequencies(tokens_tfidf)

    return wordcloud

In [None]:
genres, data = loadData()
wordclouds = {}

index = 0

for genre in genres:
    index = index + 1
    print("Loading genre {}, {}/{}".format(genre, index, len(genres)))
    
    try:
        wordclouds[genre] = getWordcloud(generateTokensFor(genre, data))
    except:
        print("Error while trying to load genre {}".format(genre))

Loading genre , 1/33
Loading genre Fantasy, 2/33
Cannot find file /Subs/en/1995/114241
Cannot find file /Subs/en/1995/114663
Cannot find file /Subs/en/1995/113234
Cannot find file /Subs/en/1994/106379
Cannot find file /Subs/en/1996/115509
Cannot find file /Subs/en/1995/114658
Cannot find file /Subs/en/1946/38300
Cannot find file /Subs/en/1995/114303
Cannot find file /Subs/en/1998/120491
Cannot find file /Subs/en/1999/143924
Cannot find file /Subs/en/1999/164108
Cannot find file /Subs/en/1989/96978
Cannot find file /Subs/en/1975/78412
Cannot find file /Subs/en/1999/118882
Cannot find file /Subs/en/1951/43899
Cannot find file /Subs/en/1987/95525
Cannot find file /Subs/en/1957/50292
Cannot find file /Subs/en/1987/93092
Cannot find file /Subs/en/1987/93418
Cannot find file /Subs/en/1987/93476
Cannot find file /Subs/en/2001/234570
Cannot find file /Subs/en/1999/162023
Cannot find file /Subs/en/1968/62776
Cannot find file /Subs/en/1989/97236
Cannot find file /Subs/en/2001/228687
Cannot find 

Cannot find file /Subs/en/1991/104418
Cannot find file /Subs/en/1986/90961
Cannot find file /Subs/en/2013/2552498
Cannot find file /Subs/en/1987/93276
Cannot find file /Subs/en/1975/73298
Cannot find file /Subs/en/1974/73804
Cannot find file /Subs/en/1973/69700
Cannot find file /Subs/en/2002/210294
Cannot find file /Subs/en/1978/78500
Cannot find file /Subs/en/2006/463826
Cannot find file /Subs/en/2000/267657
Cannot find file /Subs/en/1996/115820
Cannot find file /Subs/en/2011/1734203
Cannot find file /Subs/en/1967/61433
Cannot find file /Subs/en/1999/165396
Cannot find file /Subs/en/2007/977663
Cannot find file /Subs/en/2020/1630029
Cannot find file /Subs/en/2017/974015
Cannot find file /Subs/en/2005/424228
Cannot find file /Subs/en/1968/63240
Cannot find file /Subs/en/1935/26972
Cannot find file /Subs/en/1971/68613
Cannot find file /Subs/en/1917/8443
Cannot find file /Subs/en/2013/2379386
Cannot find file /Subs/en/2009/1311082
Cannot find file /Subs/en/1982/249567
Cannot find file /S

Cannot find file /Subs/en/1987/123150
Cannot find file /Subs/en/1997/286648
Cannot find file /Subs/en/2011/1829747
Cannot find file /Subs/en/2014/3215822
Cannot find file /Subs/en/2007/449671
Cannot find file /Subs/en/1992/242599
Cannot find file /Subs/en/1997/119870
Cannot find file /Subs/en/2012/2374486
Cannot find file /Subs/en/2000/295232
Cannot find file /Subs/en/2011/2072214
Cannot find file /Subs/en/2011/2134092
Cannot find file /Subs/en/2008/929441
Cannot find file /Subs/en/1989/185481
Cannot find file /Subs/en/1982/86972
Cannot find file /Subs/en/2015/2188860
Cannot find file /Subs/en/1969/211281
Cannot find file /Subs/en/1995/111244
Cannot find file /Subs/en/2016/3982118
Cannot find file /Subs/en/2013/1764614
Cannot find file /Subs/en/2016/3666024
Cannot find file /Subs/en/2014/3009714
Cannot find file /Subs/en/1903/420
Cannot find file /Subs/en/1941/33406
Cannot find file /Subs/en/1949/43410
Cannot find file /Subs/en/1997/159914
Cannot find file /Subs/en/1977/165362
Cannot f

Cannot find file /Subs/en/1993/109823
Cannot find file /Subs/en/1998/119494
Cannot find file /Subs/en/1927/18578
Cannot find file /Subs/en/1998/188996
Cannot find file /Subs/en/1966/55024
Cannot find file /Subs/en/1945/38120
Cannot find file /Subs/en/1981/81059
Cannot find file /Subs/en/1982/83591
Cannot find file /Subs/en/1982/84351
Cannot find file /Subs/en/1983/84538
Cannot find file /Subs/en/1997/120001
Cannot find file /Subs/en/1947/38279
Cannot find file /Subs/en/1915/4972
Cannot find file /Subs/en/1956/49652
Cannot find file /Subs/en/1950/43041
Cannot find file /Subs/en/1997/119561
Cannot find file /Subs/en/1993/104812
Cannot find file /Subs/en/2004/430745
Cannot find file /Subs/en/1925/15624
Cannot find file /Subs/en/1938/30744
Cannot find file /Subs/en/1941/34449
Cannot find file /Subs/en/1944/35959
Cannot find file /Subs/en/1955/48434
Cannot find file /Subs/en/2005/316824
Cannot find file /Subs/en/1944/37323
Cannot find file /Subs/en/1943/36431
Cannot find file /Subs/en/1943/

Cannot find file /Subs/en/1952/44621
Cannot find file /Subs/en/1952/44085
Cannot find file /Subs/en/2007/1198220
Cannot find file /Subs/en/1958/51349
Cannot find file /Subs/en/2002/313608
Cannot find file /Subs/en/1959/52606
Cannot find file /Subs/en/1973/171726
Cannot find file /Subs/en/2014/3204734
Cannot find file /Subs/en/2002/288324
Cannot find file /Subs/en/2015/2942196
Cannot find file /Subs/en/1981/159510
Cannot find file /Subs/en/1999/162897
Cannot find file /Subs/en/1954/46532
Cannot find file /Subs/en/1957/50299
Cannot find file /Subs/en/1989/96744
Cannot find file /Subs/en/1956/49302
Cannot find file /Subs/en/1967/62445
Cannot find file /Subs/en/1971/65894
Cannot find file /Subs/en/1944/37034
Cannot find file /Subs/en/1975/150282
Cannot find file /Subs/en/1976/74161
Cannot find file /Subs/en/1962/56072
Cannot find file /Subs/en/1944/139491
Cannot find file /Subs/en/1945/38243
Cannot find file /Subs/en/1982/84104
Cannot find file /Subs/en/1954/46671
Cannot find file /Subs/en

Cannot find file /Subs/en/1962/56579
Cannot find file /Subs/en/1983/85267
Cannot find file /Subs/en/2002/370754
Cannot find file /Subs/en/1967/61695
Cannot find file /Subs/en/2006/383353
Cannot find file /Subs/en/1979/79859
Cannot find file /Subs/en/1968/62908
Cannot find file /Subs/en/2006/800022
Cannot find file /Subs/en/1974/72195
Cannot find file /Subs/en/2001/283337
Cannot find file /Subs/en/2005/411269
Cannot find file /Subs/en/1989/94918
Cannot find file /Subs/en/1974/68595
Cannot find file /Subs/en/2000/219224
Cannot find file /Subs/en/1997/119700
Cannot find file /Subs/en/1916/6333
Cannot find file /Subs/en/1998/150500
Cannot find file /Subs/en/2008/363240
Cannot find file /Subs/en/2006/478207
Cannot find file /Subs/en/1959/52880
Cannot find file /Subs/en/1936/27623
Cannot find file /Subs/en/1925/16123
Cannot find file /Subs/en/1938/30138
Cannot find file /Subs/en/1975/73260
Cannot find file /Subs/en/2008/972558
Cannot find file /Subs/en/1959/52609
Cannot find file /Subs/en/19

Cannot find file /Subs/en/1940/32404
Cannot find file /Subs/en/1972/68649
Cannot find file /Subs/en/2005/337656
Cannot find file /Subs/en/2006/411267
Cannot find file /Subs/en/1946/38577
Cannot find file /Subs/en/1939/31500
Cannot find file /Subs/en/1943/36154
Cannot find file /Subs/en/2001/283337
Cannot find file /Subs/en/1970/65398
Cannot find file /Subs/en/2008/490076
Cannot find file /Subs/en/1989/99601
Cannot find file /Subs/en/2008/1180333
Cannot find file /Subs/en/2008/792986
Cannot find file /Subs/en/2008/1180333
Cannot find file /Subs/en/1935/26047
Cannot find file /Subs/en/1995/111464
Cannot find file /Subs/en/1936/27697
Cannot find file /Subs/en/1983/85448
Cannot find file /Subs/en/1997/120393
Cannot find file /Subs/en/1970/129134
Cannot find file /Subs/en/2008/1139319
Cannot find file /Subs/en/2009/1226681
Cannot find file /Subs/en/1991/99397
Cannot find file /Subs/en/2001/270645
Cannot find file /Subs/en/1972/67490
Cannot find file /Subs/en/2010/838247
Cannot find file /Su

Cannot find file /Subs/en/1999/197230
Cannot find file /Subs/en/1996/122494
Cannot find file /Subs/en/2009/1239462
Cannot find file /Subs/en/2007/479008
Cannot find file /Subs/en/2012/2391009
Cannot find file /Subs/en/1990/100912
Cannot find file /Subs/en/2011/1787092
Cannot find file /Subs/en/2011/986361
Cannot find file /Subs/en/1987/92723
Cannot find file /Subs/en/1996/117419
Cannot find file /Subs/en/1940/32408
Cannot find file /Subs/en/1942/34841
Cannot find file /Subs/en/1944/37566
Cannot find file /Subs/en/1947/39376
Cannot find file /Subs/en/1959/33865
Cannot find file /Subs/en/1964/57912
Cannot find file /Subs/en/1957/50534
Cannot find file /Subs/en/1952/45028
Cannot find file /Subs/en/1952/44958
Cannot find file /Subs/en/1961/55227
Cannot find file /Subs/en/1962/56185
Cannot find file /Subs/en/1949/41169
Cannot find file /Subs/en/1971/67813
Cannot find file /Subs/en/1970/122998
Cannot find file /Subs/en/2001/299040
Cannot find file /Subs/en/1992/167046
Cannot find file /Subs/

Cannot find file /Subs/en/1914/3740
Cannot find file /Subs/en/2001/257497
Cannot find file /Subs/en/1970/65491
Cannot find file /Subs/en/2009/1358383
Cannot find file /Subs/en/1934/25607
Cannot find file /Subs/en/1955/48602
Cannot find file /Subs/en/1960/54403
Cannot find file /Subs/en/1937/28739
Cannot find file /Subs/en/2010/1562847
Cannot find file /Subs/en/1996/117442
Cannot find file /Subs/en/1952/45039
Cannot find file /Subs/en/1936/27690
Cannot find file /Subs/en/1960/54243
Cannot find file /Subs/en/1945/36989
Cannot find file /Subs/en/2005/465326
Cannot find file /Subs/en/1936/27902
Cannot find file /Subs/en/1914/4181
Cannot find file /Subs/en/1943/35659
Cannot find file /Subs/en/1994/110747
Cannot find file /Subs/en/1934/25272
Cannot find file /Subs/en/1945/37849
Cannot find file /Subs/en/1938/30265
Cannot find file /Subs/en/1938/29950
Cannot find file /Subs/en/2006/1084683
Cannot find file /Subs/en/1967/60482
Cannot find file /Subs/en/2010/1709695
Cannot find file /Subs/en/20

Cannot find file /Subs/en/1980/81112
Cannot find file /Subs/en/1981/81178
Cannot find file /Subs/en/1980/81186
Cannot find file /Subs/en/1985/90196
Cannot find file /Subs/en/1975/73043
Cannot find file /Subs/en/1964/58007
Cannot find file /Subs/en/1972/68837
Cannot find file /Subs/en/1957/51134
Cannot find file /Subs/en/1943/37219
Cannot find file /Subs/en/1982/84133
Cannot find file /Subs/en/1940/32390
Cannot find file /Subs/en/2002/272730
Cannot find file /Subs/en/2002/285487
Cannot find file /Subs/en/1981/82362
Cannot find file /Subs/en/1976/75334
Cannot find file /Subs/en/1983/82213
Cannot find file /Subs/en/1990/121262
Cannot find file /Subs/en/1999/223251
Cannot find file /Subs/en/1982/82696
Cannot find file /Subs/en/1981/82815
Cannot find file /Subs/en/2001/213802
Cannot find file /Subs/en/1981/83033
Cannot find file /Subs/en/1981/83133
Cannot find file /Subs/en/1981/83178
Cannot find file /Subs/en/1982/83542
Cannot find file /Subs/en/1962/55894
Cannot find file /Subs/en/1984/86

Cannot find file /Subs/en/1973/68187
Cannot find file /Subs/en/1999/159797
Cannot find file /Subs/en/1921/12255
Cannot find file /Subs/en/1974/71186
Cannot find file /Subs/en/1984/86546
Cannot find file /Subs/en/1924/14586
Cannot find file /Subs/en/2010/1148200
Cannot find file /Subs/en/1976/74815
Cannot find file /Subs/en/1980/80842
Cannot find file /Subs/en/1971/67972
Cannot find file /Subs/en/1972/69738
Cannot find file /Subs/en/2010/1277936
Cannot find file /Subs/en/1962/56279
Cannot find file /Subs/en/2007/478126
Cannot find file /Subs/en/2008/1220706
Cannot find file /Subs/en/2009/1075749
Cannot find file /Subs/en/2008/893532
Cannot find file /Subs/en/1981/82243
Cannot find file /Subs/en/2008/933876
Cannot find file /Subs/en/2007/826817
Cannot find file /Subs/en/2010/918575
Cannot find file /Subs/en/1981/82352
Cannot find file /Subs/en/2009/1278480
Cannot find file /Subs/en/2003/364527
Cannot find file /Subs/en/1985/90362
Cannot find file /Subs/en/2011/1013860
Cannot find file /S

Cannot find file /Subs/en/1988/95256
Cannot find file /Subs/en/1998/127919
Cannot find file /Subs/en/2011/2063008
Cannot find file /Subs/en/1975/71212
Cannot find file /Subs/en/2006/488962
Cannot find file /Subs/en/2009/1072754
Cannot find file /Subs/en/2010/1650516
Cannot find file /Subs/en/1973/69795
Cannot find file /Subs/en/2004/390450
Cannot find file /Subs/en/2009/1063056
Cannot find file /Subs/en/2012/1815776
Cannot find file /Subs/en/1968/64393
Cannot find file /Subs/en/2010/1153546
Cannot find file /Subs/en/1991/102007
Cannot find file /Subs/en/2001/267440
Cannot find file /Subs/en/1974/72136
Cannot find file /Subs/en/1966/60580
Cannot find file /Subs/en/1958/51020
Cannot find file /Subs/en/2011/1296899
Cannot find file /Subs/en/2007/2637720
Cannot find file /Subs/en/2013/1817276
Cannot find file /Subs/en/2012/1707392
Cannot find file /Subs/en/2014/3087752
Cannot find file /Subs/en/1965/58954
Cannot find file /Subs/en/2014/2309961
Cannot find file /Subs/en/2012/2402539
Cannot 

Cannot find file /Subs/en/1987/95924
Cannot find file /Subs/en/2010/1425253
Cannot find file /Subs/en/1982/81203
Cannot find file /Subs/en/1987/92696
Cannot find file /Subs/en/2008/1157658
Cannot find file /Subs/en/2015/2231646
Cannot find file /Subs/en/2015/2123170
Cannot find file /Subs/en/1973/70599
Cannot find file /Subs/en/1977/76112
Cannot find file /Subs/en/1971/67236
Cannot find file /Subs/en/2007/961206
Cannot find file /Subs/en/1974/72710
Cannot find file /Subs/en/1973/69732
Cannot find file /Subs/en/1958/50717
Cannot find file /Subs/en/1971/66623
Cannot find file /Subs/en/1975/73847
Cannot find file /Subs/en/2005/449092
Cannot find file /Subs/en/2012/2385074
Cannot find file /Subs/en/2015/1991031
Cannot find file /Subs/en/2012/2393817
Cannot find file /Subs/en/1966/60831
Cannot find file /Subs/en/2011/1400515
Cannot find file /Subs/en/1976/74593
Cannot find file /Subs/en/1972/68367
Cannot find file /Subs/en/1977/76068
Cannot find file /Subs/en/1970/65595
Cannot find file /Su

Cannot find file /Subs/en/2013/1727885
Cannot find file /Subs/en/2013/3072668
Cannot find file /Subs/en/2007/478265
Cannot find file /Subs/en/2006/493409
Cannot find file /Subs/en/2006/810444
Cannot find file /Subs/en/2008/1151911
Cannot find file /Subs/en/1986/91338
Cannot find file /Subs/en/2012/1954780
Cannot find file /Subs/en/2007/455537
Cannot find file /Subs/en/2015/2488220
Cannot find file /Subs/en/2015/3772918
Cannot find file /Subs/en/1972/68231
Cannot find file /Subs/en/1973/69375
Cannot find file /Subs/en/1995/114196
Cannot find file /Subs/en/1989/96785
Cannot find file /Subs/en/1980/80379
Cannot find file /Subs/en/1980/78936
Cannot find file /Subs/en/2016/4009278
Cannot find file /Subs/en/2014/2689354
Cannot find file /Subs/en/1994/111082
Cannot find file /Subs/en/1990/103268
Cannot find file /Subs/en/2009/1413527
Cannot find file /Subs/en/2008/902952
Cannot find file /Subs/en/2011/1640218
Cannot find file /Subs/en/2008/1297943
Cannot find file /Subs/en/2011/1870527
Cannot

Cannot find file /Subs/en/1996/110374
Cannot find file /Subs/en/1991/102855
Cannot find file /Subs/en/1995/112712
Cannot find file /Subs/en/1996/113122
Cannot find file /Subs/en/1995/113448
Cannot find file /Subs/en/1995/117517
Cannot find file /Subs/en/1996/113947
Cannot find file /Subs/en/1995/109028
Cannot find file /Subs/en/1996/118026
Cannot find file /Subs/en/1995/117117
Cannot find file /Subs/en/1993/111787
Cannot find file /Subs/en/1996/115591
Cannot find file /Subs/en/1996/116269
Cannot find file /Subs/en/1995/110061
Cannot find file /Subs/en/1994/108181
Cannot find file /Subs/en/1995/114307
Cannot find file /Subs/en/1994/109491
Cannot find file /Subs/en/1995/111613
Cannot find file /Subs/en/1995/112604
Cannot find file /Subs/en/1996/117784
Cannot find file /Subs/en/2002/333373
Cannot find file /Subs/en/1995/112586
Cannot find file /Subs/en/1943/31612
Cannot find file /Subs/en/1995/113188
Cannot find file /Subs/en/1996/116833
Cannot find file /Subs/en/1996/116594
Cannot find f

Cannot find file /Subs/en/1998/169333
Cannot find file /Subs/en/1998/169156
Cannot find file /Subs/en/1922/13662
Cannot find file /Subs/en/1976/75406
Cannot find file /Subs/en/1937/29606
Cannot find file /Subs/en/1996/117131
Cannot find file /Subs/en/2000/141399
Cannot find file /Subs/en/2000/143344
Cannot find file /Subs/en/1971/67433
Cannot find file /Subs/en/1999/159373
Cannot find file /Subs/en/2000/217107
Cannot find file /Subs/en/1999/118882
Cannot find file /Subs/en/1975/73902
Cannot find file /Subs/en/2000/158583
Cannot find file /Subs/en/1999/181618
Cannot find file /Subs/en/1966/55024
Cannot find file /Subs/en/1967/61801
Cannot find file /Subs/en/1998/171135
Cannot find file /Subs/en/1947/39748
Cannot find file /Subs/en/1997/117664
Cannot find file /Subs/en/1985/90219
Cannot find file /Subs/en/1999/201538
Cannot find file /Subs/en/1998/168475
Cannot find file /Subs/en/1991/101615
Cannot find file /Subs/en/1986/91313
Cannot find file /Subs/en/1989/98251
Cannot find file /Subs/

Cannot find file /Subs/en/1998/138563
Cannot find file /Subs/en/1997/141986
Cannot find file /Subs/en/1997/119165
Cannot find file /Subs/en/1998/139468
Cannot find file /Subs/en/1997/116481
Cannot find file /Subs/en/1998/175550
Cannot find file /Subs/en/1998/188996
Cannot find file /Subs/en/1999/168950
Cannot find file /Subs/en/1982/84156
Cannot find file /Subs/en/1999/184510
Cannot find file /Subs/en/1997/119614
Cannot find file /Subs/en/1999/181833
Cannot find file /Subs/en/1999/150574
Cannot find file /Subs/en/1999/176093
Cannot find file /Subs/en/1998/187712
Cannot find file /Subs/en/1975/72742
Cannot find file /Subs/en/1971/67527
Cannot find file /Subs/en/1998/144969
Cannot find file /Subs/en/1957/50558
Cannot find file /Subs/en/2000/208261
Cannot find file /Subs/en/1998/183065
Cannot find file /Subs/en/1999/192069
Cannot find file /Subs/en/1998/144801
Cannot find file /Subs/en/1930/20594
Cannot find file /Subs/en/2000/221023
Cannot find file /Subs/en/1999/218043
Cannot find file 

Cannot find file /Subs/en/2007/912599
Cannot find file /Subs/en/2006/468442
Cannot find file /Subs/en/2007/1002540
Cannot find file /Subs/en/2008/995061
Cannot find file /Subs/en/2007/912590
Cannot find file /Subs/en/2008/963208
Cannot find file /Subs/en/2005/892425
Cannot find file /Subs/en/2006/844462
Cannot find file /Subs/en/2006/478090
Cannot find file /Subs/en/2008/1047007
Cannot find file /Subs/en/2003/387682
Cannot find file /Subs/en/2006/479916
Cannot find file /Subs/en/2006/841119
Cannot find file /Subs/en/2007/939681
Cannot find file /Subs/en/1974/233809
Cannot find file /Subs/en/2008/1037033
Cannot find file /Subs/en/2007/479547
Cannot find file /Subs/en/2006/997088
Cannot find file /Subs/en/2008/929235
Cannot find file /Subs/en/2003/378244
Cannot find file /Subs/en/2008/1204298
Cannot find file /Subs/en/2007/913958
Cannot find file /Subs/en/2007/1045889
Cannot find file /Subs/en/2007/1086340
Cannot find file /Subs/en/2008/1157620
Cannot find file /Subs/en/2006/841084
Canno

In [None]:
pickle_file = open("wordclouds.backup", "wb")

pickle.dump(wordclouds, pickle_file)

pickle_file.close()

Loading error list
Science Fiction
Mystery
Animation
Foreign
History
Horror
Drama
Crime
Action
Filmworks
