# Sort dataframes

In [17]:
import numpy as np
import pandas as pd
# Restricting number of displaying rows, just for convenience
pd.set_option('max_rows', 8)

## Load data

In [2]:
films = pd.read_csv('data/movie.csv')

## Sorting

`sort_values()` method let us sort rows in dataframe by several variables.  
Imagine you want to sort films by score on IMDB starting from most popular and by budget with the cheapest ones at the top. Just pass lists of these columns and order in a method
`sort_values()` has following parameters:
* `by` - column name or list of column names
* `ascending` - boolean or list of them representing order of sorting: False - from highest to lowest and True - from lowest to highest
* `inplace` - make sorting inplace or not
* `kind` - algorithm of sorting, default quicksort, for multiple columns in `by` default is mergesort
* `na_position` - 'first' or 'last' - where to put NA, default is 'last'

In [18]:
# Sort rows by IMDB score first and then by budget in each group of rows formed by previous 
# sort (e.g. having same score)
(films.sort_values(['imdb_score', 'budget'], ascending=[False, True])
      .loc[:, ['movie_title', 'imdb_score', 'budget']])

Unnamed: 0,movie_title,imdb_score,budget
2725,Towering Inferno,9.5,
1920,The Shawshank Redemption,9.3,25000000.0
3402,The Godfather,9.2,6000000.0
4312,Kickboxer: Vengeance,9.1,17000000.0
...,...,...,...
2266,Superbabies: Baby Geniuses 2,1.9,20000000.0
2240,Disaster Movie,1.9,25000000.0
1126,Foodfight!,1.7,65000000.0
2789,Justin Bieber: Never Say Never,1.6,13000000.0


## Edge values selecting

There are 2 quite convenient methods for selecting rows with top or bottom values  
Both `nlargest()` and `nsmallest()` have these parameters:
* `n` - number of selecting rows
* `columns` - column name or list of them by which sorting will be performed
* `keep` - which row to choose in case of duplicates - first (default) or last

In [28]:
# Take 100 films with largest budget
films.nlargest(100, 'budget').loc[:, 'movie_title':'imdb_score']

Unnamed: 0,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score
3787,Lady Vengeance,53508,907,Hye-jeong Kang,0.0,cake|christian|lesbian sex|oral sex|pregnant s...,http://www.imdb.com/title/tt0451094/?ref_=fn_t...,131.0,Korean,South Korea,R,4.200000e+09,2005.0,126.0,7.7
2955,Fateless,5603,11,Bálint Péntek,0.0,bus|death|gay slur|hatred|jewish,http://www.imdb.com/title/tt0367082/?ref_=fn_t...,45.0,Hungarian,Hungary,R,2.500000e+09,2005.0,2.0,7.1
2294,Princess Mononoke,221552,2710,Billy Crudup,0.0,anime|cult film|forest|princess|studio ghibli,http://www.imdb.com/title/tt0119698/?ref_=fn_t...,570.0,Japanese,Japan,PG-13,2.400000e+09,1997.0,851.0,8.4
2305,Steamboy,13727,991,Rosalind Ayres,1.0,19th century|ball|boy|inventor|steam,http://www.imdb.com/title/tt0348121/?ref_=fn_t...,79.0,Japanese,Japan,PG-13,2.127520e+09,2004.0,336.0,6.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,Captain America: The Winter Soldier,496749,36188,Hayley Atwell,1.0,conspiracy|heroism|megalomaniac|super soldier|...,http://www.imdb.com/title/tt1843866/?ref_=fn_t...,742.0,English,USA,PG-13,1.700000e+08,2014.0,11000.0,7.8
95,Guardians of the Galaxy,682155,32438,Djimon Hounsou,3.0,bounty hunter|outer space|raccoon|talking anim...,http://www.imdb.com/title/tt2015381/?ref_=fn_t...,1097.0,English,USA,PG-13,1.700000e+08,2014.0,14000.0,8.1
106,Alice Through the Looking Glass,21352,80806,Anne Hathaway,1.0,clock|dark fantasy|mad hatter|queen|sequel,http://www.imdb.com/title/tt2567026/?ref_=fn_t...,131.0,English,USA,PG,1.700000e+08,2016.0,25000.0,6.4
127,Thor: The Dark World,414070,59803,Anthony Hopkins,3.0,arrest|portal|thor|warrior|weapon,http://www.imdb.com/title/tt1981115/?ref_=fn_t...,532.0,English,USA,PG-13,1.700000e+08,2013.0,20000.0,7.1


In [37]:
# Take 100 films with lowest budget and arrange them according to IMDB score
films.nsmallest(100, ['budget', 'imdb_score']).loc[:, 'movie_title':'imdb_score']

Unnamed: 0,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score
4684,Tarnation,5709,78,Renee Leblanc,3.0,answering machine|home movie|lithium|schizophr...,http://www.imdb.com/title/tt0390538/?ref_=fn_t...,114.0,English,USA,Unrated,218.0,2003.0,20.0,7.2
4915,My Date with Drew,4285,163,Jon Gunn,0.0,actress name in title|crush|date|four word tit...,http://www.imdb.com/title/tt0378407/?ref_=fn_t...,84.0,English,USA,PG,1100.0,2004.0,23.0,6.6
4913,A Plague So Pleasant,38,0,David Chandler,0.0,,http://www.imdb.com/title/tt2107644/?ref_=fn_t...,3.0,English,USA,,1400.0,2013.0,0.0,6.3
4909,The Mongol King,36,93,Sara Stepnicka,0.0,jewell|mongol|nostradamus|stepnicka|vallone,http://www.imdb.com/title/tt0430371/?ref_=fn_t...,1.0,English,USA,PG-13,3250.0,2005.0,44.0,7.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4846,"Peace, Propaganda & the Promised Land",496,103,Arik Ascherman,3.0,arab israeli conflict|israel|media|middle east...,http://www.imdb.com/title/tt0428959/?ref_=fn_t...,13.0,English,USA,,70000.0,2004.0,0.0,8.3
4815,A Charlie Brown Christmas,21826,139,Christopher Shea,0.0,christmas|christmas tree|commercialism|meaning...,http://www.imdb.com/title/tt0059026/?ref_=fn_t...,126.0,English,USA,TV-G,150000.0,1965.0,36.0,8.4
4801,Children of Heaven,27882,100,Mohammad Amir Naji,0.0,class|gardening|race|school|shoe,http://www.imdb.com/title/tt0118849/?ref_=fn_t...,130.0,Persian,Iran,PG,180000.0,1997.0,35.0,8.5
4804,Butterfly Girl,27,0,Emily Gorell,0.0,,http://www.imdb.com/title/tt2421956/?ref_=fn_t...,1.0,English,USA,,180000.0,2014.0,0.0,8.7


In [38]:
# Take 100 films with highest budget and after it get 5 films from this subset with lowest IMDB score
films.nlargest(100, 'budget').nsmallest(5, 'imdb_score').loc[:, ['movie_title', 'budget', 'imdb_score']]

Unnamed: 0,movie_title,budget,imdb_score
71,Wild Wild West,170000000.0,4.8
62,Jupiter Ascending,176000000.0,5.4
74,Evan Almighty,175000000.0,5.4
42,Green Lantern,200000000.0,5.6
37,Transformers: Age of Extinction,210000000.0,5.7
