# MovieLens dataset report
## Prepare

In [1]:
from movielens_analysis import Movies, Links, Ratings, Tags, Statistics

In [2]:
%ls
%ls ml-latest-small

[1m[36m__pycache__[m[m/            movielens_analysis.py   test.txt
[1m[36mml-latest-small[m[m/        movielens_report.ipynb
README.txt       micro_links.csv  ratings.csv
[31mlinks.csv[m[m*       movies.csv       tags.csv


In [3]:
MOVIES_CSV = 'ml-latest-small/movies.csv'
LINKS_CSV = 'ml-latest-small/links.csv'
RATINGS_CSV = 'ml-latest-small/ratings.csv'
TAGS_CSV = 'ml-latest-small/tags.csv'

## Movies analysis

### Distribution by release year

In [4]:
movies = Movies(MOVIES_CSV)

In [5]:
%timeit movies.dist_by_release()

44.4 ms ± 1.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
dist_by_release = movies.dist_by_release()

tmp = list(dist_by_release.items())
for index in range(len(tmp)):
    print(f'{tmp[index][0]} : {tmp[index][1]}', end='\t')
    index += 1
    if index % 5 == 0:
        print()

print('\n')

2002 : 311	2006 : 295	2001 : 294	2007 : 284	2000 : 283	
2009 : 282	2003 : 279	2004 : 279	2014 : 278	1996 : 276	
2015 : 274	2005 : 273	2008 : 269	1999 : 263	1997 : 260	
1995 : 259	1998 : 258	2011 : 254	2010 : 247	2013 : 239	
1994 : 237	2012 : 233	2016 : 218	1993 : 198	1992 : 167	
1988 : 165	1987 : 153	1990 : 147	1991 : 147	2017 : 147	
1989 : 142	1986 : 139	1985 : 126	1984 : 101	1981 : 92	
1980 : 89	1982 : 87	1983 : 83	1979 : 69	1977 : 63	
1973 : 59	1978 : 59	1965 : 47	1971 : 47	1974 : 45	
1976 : 44	1964 : 43	1967 : 42	1968 : 42	1975 : 42	
1966 : 42	2018 : 41	1962 : 40	1972 : 39	1963 : 39	
1959 : 37	1960 : 37	1955 : 36	1969 : 35	1961 : 34	
1970 : 33	1957 : 33	1958 : 31	1953 : 30	1956 : 30	
1940 : 25	1949 : 25	1954 : 23	1942 : 23	1939 : 23	
1946 : 23	1951 : 22	1950 : 21	1947 : 20	1948 : 20	
1941 : 18	1936 : 18	1945 : 17	1937 : 16	1952 : 16	
1944 : 16	1938 : 15	1931 : 14	1935 : 13	Null : 13	
1933 : 12	1934 : 11	1943 : 10	1932 : 9	1927 : 7	
1930 : 5	1926 : 5	1924 : 5	1929 : 4	1928 : 4	
1925

### Distribution of genres

In [7]:
%timeit movies.dist_by_genres()

40.1 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
movies = Movies(MOVIES_CSV)

for key, value in movies.dist_by_genres().items():
    print(f'{key} : {value}')

print()

Drama : 4361
Comedy : 3756
Thriller : 1894
Action : 1828
Romance : 1596
Adventure : 1263
Crime : 1199
Sci-Fi : 980
Horror : 978
Fantasy : 779
Children : 664
Animation : 611
Mystery : 573
Documentary : 440
War : 382
Musical : 334
Western : 167
IMAX : 158
Film-Noir : 87
(no genres listed) : 34



### 30 most genres films

In [9]:
%timeit movies.most_genres(30)

34.5 ms ± 732 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
movies = Movies(MOVIES_CSV)

print(f'{"Film":<70}Ganres number')
for key, value in movies.most_genres(30).items():
    print(f'{key:<70}{value}')

print()

Film                                                                  Ganres number
Rubber (2010)                                                         10
Patlabor: The Movie (Kidô keisatsu patorebâ: The Movie) (1989)        8
Mulan (1998)                                                          7
Who Framed Roger Rabbit? (1988)                                       7
Osmosis Jones (2001)                                                  7
Interstate 60 (2002)                                                  7
Robots (2005)                                                         7
Pulse (2006)                                                          7
Aqua Teen Hunger Force Colon Movie Film for Theaters (2007)           7
Enchanted (2007)                                                      7
Aelita: The Queen of Mars (Aelita) (1924)                             7
Inception (2010)                                                      7
Tangled (2010)                                     

## Links analysis

### Get imdb information

In [11]:
links = Links(LINKS_CSV, movies)

In [12]:
%timeit links.get_imdb([1, 3, 5, 7, 15], ['Director', 'Budget', 'Gross worldwide', 'Runtime'])

168 µs ± 54.2 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
fields = ['Director', 'Budget', 'Gross worldwide', 'Runtime']
imdb_info = links.get_imdb([1, 3, 5, 7, 15], fields)

print('MovieId', *fields, sep='\t\t')
for movie in imdb_info:
    print(*movie, sep='\t\t')

MovieId		Director		Budget		Gross worldwide		Runtime
1		John Lasseter		$30,000,000 (estimated)		$394,436,586		1 hour 21 minutes
15		Renny Harlin		$98,000,000 (estimated)		$10,017,322		2 hours 4 minutes
3		Howard Deutch		$25,000,000 (estimated)		$71,518,503		1 hour 41 minutes
5		Charles Shyer		$30,000,000 (estimated)		$76,594,107		1 hour 46 minutes
7		Sydney Pollack		$58,000,000 (estimated)		$53,672,080		2 hours 7 minutes


### Top directors

In [14]:
%timeit -r 1 -n 1 links.top_directors(20)

24.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [15]:
top_directors = links.top_directors(20)

print(f'{"Director":<20}Films count')
for key, value in top_directors.items():
    if key is None:
        key = 'Null'
    print(f'{key:<20}{value}')

Director            Films count
John Lasseter       1
Joe Johnston        1
Howard Deutch       1
Forest Whitaker     1
Charles Shyer       1
Michael Mann        1
Sydney Pollack      1
Peter Hewitt        1
Peter Hyams         1
Martin Campbell     1
Rob Reiner          1
Mel Brooks          1
Simon Wells         1
Oliver Stone        1
Renny Harlin        1
Martin Scorsese     1
Ang Lee             1
Null                1
Steve Oedekerk      1
Joseph Ruben        1


### Most expensive films

In [16]:
%timeit links.most_expensive(20)

132 µs ± 6.42 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [17]:
most_expensive = links.most_expensive(20)

print(f'{"Film":<40}Budget')
for key, value in most_expensive.items():
    print(f'{key:<40}{value}')

Film                                    Budget
Cutthroat Island (1995)                 $98,000,000 (estimated)
Money Train (1995)                      $68,000,000 (estimated)
Jumanji (1995)                          $65,000,000 (estimated)
American President, The (1995)          $62,000,000 (estimated)
Heat (1995)                             $60,000,000 (estimated)
GoldenEye (1995)                        $60,000,000 (estimated)
Sabrina (1995)                          $58,000,000 (estimated)
Casino (1995)                           $52,000,000 (estimated)
Nixon (1995)                            $44,000,000 (estimated)
Four Rooms (1995)                       $4,000,000 (estimated)
Sudden Death (1995)                     $35,000,000 (estimated)
Toy Story (1995)                        $30,000,000 (estimated)
Father of the Bride Part II (1995)      $30,000,000 (estimated)
Dracula: Dead and Loving It (1995)      $30,000,000 (estimated)
Ace Ventura: When Nature Calls (1995)   $30,000,000 (estim

### Most profitable films

In [18]:
%timeit links.most_profitable(20)

281 µs ± 13.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [19]:
most_profitable = links.most_profitable(20)

print(f'{"Film":<40}Profit')
for key, value in most_profitable.items():
    print(f'{key:<40}{value}')

Film                                    Profit
Toy Story (1995)                        364436586.0
Heat (1995)                             127436818.0
Father of the Bride Part II (1995)      46594107.0
Tom and Huck (1995)                     nan
GoldenEye (1995)                        292194034.0
Jumanji (1995)                          197821940.0
Balto (1995)                            nan
Ace Ventura: When Nature Calls (1995)   182385533.0
Sense and Sensibility (1995)            118582776.0
Waiting to Exhale (1995)                65452156.0
Casino (1995)                           64112375.0
Grumpier Old Men (1995)                 46518503.0
American President, The (1995)          45879496.0
Sudden Death (1995)                     29350171.0
Four Rooms (1995)                       257354.0
Sabrina (1995)                          -4327920.0
Dracula: Dead and Loving It (1995)      -19227856.0
Nixon (1995)                            -30318235.0
Money Train (1995)                      -32

### Longest films

In [20]:
%timeit links.longest(20)

148 µs ± 7.74 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [21]:
longest = links.longest(20)

print(f'{"Film":<40}Runtime')
for key, value in longest.items():
    print(f'{key:<40}{value}')

Film                                    Runtime
Nixon (1995)                            3 hours 12 minutes
Casino (1995)                           2 hours 58 minutes
Heat (1995)                             2 hours 50 minutes
Sense and Sensibility (1995)            2 hours 16 minutes
GoldenEye (1995)                        2 hours 10 minutes
Sabrina (1995)                          2 hours 7 minutes
Waiting to Exhale (1995)                2 hours 4 minutes
Cutthroat Island (1995)                 2 hours 4 minutes
American President, The (1995)          1 hour 54 minutes
Sudden Death (1995)                     1 hour 51 minutes
Money Train (1995)                      1 hour 50 minutes
Father of the Bride Part II (1995)      1 hour 46 minutes
Jumanji (1995)                          1 hour 44 minutes
Grumpier Old Men (1995)                 1 hour 41 minutes
Four Rooms (1995)                       1 hour 38 minutes
Tom and Huck (1995)                     1 hour 37 minutes
Ace Ventura: When N

### Cost per unit top

In [22]:
%timeit links.top_cost_per_minute(20)

274 µs ± 23 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [23]:
top_cost_per_minute = links.top_cost_per_minute(20)

print(f'{"Film":<40}Cost per minute')
for key, value in top_cost_per_minute.items():
    print(f'{key:<40}{value}')

Film                                    Cost per minute
Jumanji (1995)                          625000.0
Tom and Huck (1995)                     nan
Balto (1995)                            nan
Cutthroat Island (1995)                 790322.58
Money Train (1995)                      618181.82
American President, The (1995)          543859.65
GoldenEye (1995)                        461538.46
Sabrina (1995)                          456692.91
Toy Story (1995)                        370370.37
Heat (1995)                             352941.18
Dracula: Dead and Loving It (1995)      340909.09
Ace Ventura: When Nature Calls (1995)   333333.33
Sudden Death (1995)                     315315.32
Casino (1995)                           292134.83
Father of the Bride Part II (1995)      283018.87
Grumpier Old Men (1995)                 247524.75
Nixon (1995)                            229166.67
Waiting to Exhale (1995)                129032.26
Sense and Sensibility (1995)            117647.06
Four Ro

## Ratings.Movies analysis
### Distribution of ratings count by year

In [24]:
ratings = Ratings(RATINGS_CSV)
movies_ratings = Ratings.Movies(ratings, movies)

In [25]:
%timeit movies_ratings.dist_by_year()

350 ms ± 16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
dist_by_year = movies_ratings.dist_by_year()

print(f'{"Year":<6}Ratings count')
for key, value in dist_by_year.items():
    print(f'{key:<6}{value}')

Year  Ratings count
1996  6040
1997  1916
1998  507
1999  2439
2000  10061
2001  3922
2002  3478
2003  4014
2004  3279
2005  5813
2006  4059
2007  7114
2008  4351
2009  4158
2010  2300
2011  1690
2012  4657
2013  1664
2014  1439
2015  6616
2016  6702
2017  8199
2018  6418


### Distribution of ratings count by rating value

In [27]:
%timeit movies_ratings.dist_by_rating()

260 ms ± 4.44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
dist_by_rating = movies_ratings.dist_by_rating()

print(f'{"Rating value":<15}Ratings count')
for key, value in dist_by_rating.items():
    print(f'{key:<15}{value}')

Rating value   Ratings count
0.5            1370
1.0            2811
1.5            1791
2.0            7551
2.5            5550
3.0            20047
3.5            13136
4.0            26818
4.5            8551
5.0            13211


### Top movies by rating (average)

In [29]:
%timeit movies_ratings.top_by_ratings(30)

281 ms ± 6.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
top_by_ratings = movies_ratings.top_by_ratings(30)

print(f'{"Movie":<75}Average rating')
for key, value in top_by_ratings.items():
    print(f'{key:<75}{value}')

Movie                                                                      Average rating
The Jinx: The Life and Deaths of Robert Durst (2015)                       5.0
Galaxy of Terror (Quest) (1981)                                            5.0
Alien Contamination (1980)                                                 5.0
I'm the One That I Want (2000)                                             5.0
Lesson Faust (1994)                                                        5.0
Assignment, The (1997)                                                     5.0
Mephisto (1981)                                                            5.0
Black Mirror                                                               5.0
Dylan Moran: Monster (2004)                                                5.0
Bill Hicks: Revelations (1993)                                             5.0
My Sassy Girl (Yeopgijeogin geunyeo) (2001)                                5.0
Strictly Sexual (2008)                   

### Top movies by rating (median)

In [31]:
%timeit movies_ratings.top_by_ratings(30, metric=Statistics.median)

296 ms ± 9.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
top_by_ratings = movies_ratings.top_by_ratings(30, metric=Statistics.median)

print(f'{"Movie":<75}Median rating')
for key, value in top_by_ratings.items():
    print(f'{key:<75}{value}')

Movie                                                                      Median rating
The Jinx: The Life and Deaths of Robert Durst (2015)                       5.0
Galaxy of Terror (Quest) (1981)                                            5.0
Alien Contamination (1980)                                                 5.0
Troll 2 (1990)                                                             5.0
I'm the One That I Want (2000)                                             5.0
Chorus Line, A (1985)                                                      5.0
Guess Who's Coming to Dinner (1967)                                        5.0
Children of the Corn IV: The Gathering (1996)                              5.0
Band of Brothers (2001)                                                    5.0
Lesson Faust (1994)                                                        5.0
Assignment, The (1997)                                                     5.0
Mephisto (1981)                           

### Top controversial movies

In [33]:
%timeit movies_ratings.top_controversial(30)

318 ms ± 9.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [34]:
top_controversial = movies_ratings.top_controversial(30)

print(f'{"Movie":<75}Rating variance')
for key, value in top_controversial.items():
    print(f'{key:<75}{value}')

Movie                                                                      Rating variance
Troll 2 (1990)                                                             5.42
Ivan's Childhood (a.k.a. My Name is Ivan) (Ivanovo detstvo) (1962)         5.03
The Jinx: The Life and Deaths of Robert Durst (2015)                       5.0
Galaxy of Terror (Quest) (1981)                                            5.0
Alien Contamination (1980)                                                 5.0
I'm the One That I Want (2000)                                             5.0
Assignment, The (1997)                                                     5.0
Mephisto (1981)                                                            5.0
Black Mirror                                                               5.0
Dylan Moran: Monster (2004)                                                5.0
Bill Hicks: Revelations (1993)                                             5.0
My Sassy Girl (Yeopgijeogin geunyeo) (

## Ratings.Users analysis
### Distribution of users by ratings count

In [35]:
users_ratings = Ratings.Users(ratings, movies)

In [36]:
%timeit users_ratings.dist_by_ratings_number()

252 ms ± 4.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [37]:
dist_by_ratings_number = users_ratings.dist_by_ratings_number()

print(f'{"User":<8}Number of ratings')
for key, value in dist_by_ratings_number.items():
    print(f'{key:<8}{value}')

User    Number of ratings
53      20
147     20
189     20
194     20
207     20
257     20
278     20
320     20
406     20
431     20
442     20
569     20
576     20
595     20
26      21
37      21
49      21
87      21
157     21
245     21
281     21
293     21
324     21
364     21
439     21
507     21
547     21
549     21
598     21
60      22
118     22
120     22
127     22
138     22
192     22
214     22
407     22
433     22
467     22
478     22
494     22
531     22
544     22
35      23
145     23
163     23
251     23
299     23
329     23
394     23
397     23
423     23
485     23
545     23
568     23
574     23
92      24
175     24
180     24
231     24
289     24
508     24
518     24
55      25
173     25
206     25
228     25
258     25
333     25
360     25
392     25
529     25
25      26
81      26
150     26
158     26
172     26
208     26
218     26
355     26
459     26
515     26
516     26
519     26
548     26
205     27
250     27
296     27
461   

### Distribution of users by ratings values (average)

In [38]:
%timeit users_ratings.dist_by_ratings_values()

237 ms ± 5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
dist_by_ratings_values = users_ratings.dist_by_ratings_values()

print(f'{"User":<8}Average rating value')
for key, value in dist_by_ratings_values.items():
    print(f'{key:<8}{value}')

User    Average rating value
442     1.27
139     2.14
508     2.15
153     2.22
567     2.25
311     2.34
298     2.36
517     2.39
308     2.43
3       2.44
22      2.57
255     2.57
571     2.57
297     2.6
19      2.61
294     2.61
287     2.62
293     2.62
36      2.63
333     2.64
428     2.64
599     2.64
307     2.67
535     2.67
160     2.71
245     2.71
149     2.72
431     2.73
365     2.75
386     2.75
217     2.76
81      2.77
50      2.78
481     2.81
478     2.82
55      2.84
368     2.84
448     2.85
214     2.86
230     2.86
329     2.87
207     2.88
510     2.9
181     2.94
338     2.94
342     2.94
461     2.94
394     2.96
600     2.99
133     3.0
163     3.0
316     3.0
28      3.02
489     3.02
54      3.03
94      3.04
132     3.04
47      3.05
314     3.05
395     3.05
416     3.07
76      3.08
384     3.09
427     3.1
576     3.1
127     3.11
262     3.11
259     3.12
552     3.12
608     3.13
146     3.14
324     3.14
396     3.14
487     3.14
288     3.15
78 

### Distribution of users by ratings values (median)

In [40]:
%timeit users_ratings.dist_by_ratings_values(metric=Statistics.median)

242 ms ± 6.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
dist_by_ratings_values = users_ratings.dist_by_ratings_values(metric=Statistics.median)

print(f'{"User":<8}Median of rating value')
for key, value in dist_by_ratings_values.items():
    print(f'{key:<8}{value}')

User    Median of rating value
3       0.5
442     1.0
139     2.0
153     2.0
255     2.0
293     2.0
329     2.0
508     2.0
567     2.0
571     2.0
36      2.5
287     2.5
298     2.5
307     2.5
308     2.5
517     2.5
599     2.5
311     2.75
431     2.75
478     2.75
6       3.0
8       3.0
9       3.0
14      3.0
19      3.0
22      3.0
26      3.0
28      3.0
38      3.0
44      3.0
47      3.0
50      3.0
54      3.0
55      3.0
78      3.0
81      3.0
94      3.0
102     3.0
109     3.0
117     3.0
120     3.0
121     3.0
126     3.0
132     3.0
133     3.0
134     3.0
136     3.0
145     3.0
146     3.0
149     3.0
150     3.0
157     3.0
160     3.0
163     3.0
165     3.0
170     3.0
173     3.0
174     3.0
181     3.0
214     3.0
217     3.0
222     3.0
230     3.0
232     3.0
242     3.0
245     3.0
262     3.0
265     3.0
268     3.0
270     3.0
271     3.0
283     3.0
288     3.0
294     3.0
297     3.0
314     3.0
315     3.0
316     3.0
321     3.0
323     3.0
324   

### Top of users by variance of their ratings

In [42]:
%timeit users_ratings.top_by_variance(30)

288 ms ± 9.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [43]:
top_by_variance = users_ratings.top_by_variance(30)

print(f'{"User":<8}Variance of ratings')
for key, value in top_by_variance.items():
    print(f'{key:<8}{value}')

User    Variance of ratings
70946   5.42
32892   5.03
131724  5.0
5746    5.0
6835    5.0
3851    5.0
1631    5.0
2075    5.0
176601  5.0
92494   5.0
102217  5.0
27523   5.0
67618   5.0
8804    5.0
26350   5.0
31522   5.0
1140    5.0
6402    5.0
8238    5.0
25887   5.0
34312   5.0
44851   5.0
47736   5.0
50999   5.0
53280   5.0
53355   5.0
53578   5.0
60737   5.0
69211   5.0
69469   5.0


## Tags analysis

### Most words

In [44]:
tags = Tags(TAGS_CSV)

In [45]:
%timeit tags.most_words(30)

12.5 ms ± 306 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [46]:
dist_by_release = tags.most_words(30)

print(f'{"Tag":<90}Number of words')
for key, value in dist_by_release.items():
    print(f'{key:<90}{value}')

Tag                                                                                       Number of words
Something for everyone in this one... saw it without and plan on seeing it with kids!     32
the catholic church is the most corrupt organization in history                           20
villain nonexistent or not needed for good story                                          16
It was melodramatic and kind of dumb                                                      14
06 Oscar Nominated Best Movie - Animation                                                 12
stop using useless characters for filler                                                  12
r:disturbing violent content including rape                                               12
Everything you want is here                                                               10
Oscar (Best Music - Original Score)                                                       10
based on a true story                                    

### Longest

In [47]:
%timeit tags.longest(30)

9.55 ms ± 477 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [48]:
longest = tags.longest(30)

print('Tag\n---------')
for tag in longest:
    print(tag)

Tag
---------
Something for everyone in this one... saw it without and plan on seeing it with kids!
the catholic church is the most corrupt organization in history
villain nonexistent or not needed for good story
r:disturbing violent content including rape
06 Oscar Nominated Best Movie - Animation
stop using useless characters for filler
Academy award (Best Supporting Actress)
Oscar (Best Effects - Visual Effects)
audience intelligence underestimated
It was melodramatic and kind of dumb
r:sustained strong stylized violence
Oscar (Best Music - Original Score)
start of a beautiful friendship
Oscar (Best Supporting Actress)
assassin-in-training (scene)
Oscar (Best Cinematography)
Everything you want is here
political right versus left
avant-garde romantic comedy
r:disturbing violent images
representation of children
Not available from Netflix
Rita Hayworth can dance!
stupid is as stupid does
beautiful cinematography
r:strong bloody violence
setting:space/space ship
Guardians of the Galaxy

### Most words and longest

In [49]:
%timeit tags.most_words_and_longest(30)

21.9 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [50]:
most_words_and_longest = tags.most_words_and_longest(30)

print('Tag\n---------')
for tag in most_words_and_longest:
    print(tag)

Tag
---------
Something for everyone in this one... saw it without and plan on seeing it with kids!
the catholic church is the most corrupt organization in history
villain nonexistent or not needed for good story
It was melodramatic and kind of dumb
06 Oscar Nominated Best Movie - Animation
stop using useless characters for filler
r:disturbing violent content including rape
Everything you want is here
Oscar (Best Music - Original Score)
based on a true story
based on a TV show
start of a beautiful friendship
Academy award (Best Supporting Actress)
a dingo ate my baby
stop looking at me swan
GIVE ME BACK MY SON!
stupid is as stupid does
r:sustained strong stylized violence
Oscar (Best Effects - Visual Effects)
heroine in tight suit
lord of the rings
Guardians of the Galaxy
Harley Quinn's ass
jay and silent bob
based on a book
assassin-in-training (scene)
end of the world
a clever chef rat
political right versus left
seen more than once
audience intelligence underestimated
Oscar (Best Su

### Most popular

In [51]:
%timeit tags.most_popular(20)

52.4 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [52]:
most_popular = tags.most_popular(20)

print(f'{"Tag":<30}Usage number')
for key, value in most_popular.items():
    print(f'{key:<30}{value}')

Tag                           Usage number
in netflix queue              131
disney                        23
shakespeare                   13
stephen king                  13
holocaust                     12
mafia                         11
leonardo dicaprio             10
india                         10
england                       10
australia                     10
vietnam                       10
world war ii                  9
christmas                     8
quentin tarantino             7
coen brothers                 7
animal movie                  7
will ferrell                  6
atmospheric                   6
magic                         6
astaire and rogers            6


### Tags with (some word)

In [53]:
word_for_tag = 'history'

In [54]:
%timeit tags.tags_with(word_for_tag)

9.37 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [55]:
tags_with = tags.tags_with(word_for_tag)

print(f'Tags with {word_for_tag}\n---------')
for tag in tags_with:
    print(tag)

Tags with history
---------
film history
history
the catholic church is the most corrupt organization in history
