# WAYS OF FILTERING A DATAFRAME

In [1]:
import numpy as np
import pandas as pd

%config IPCompleter.greedy = True
# Suppress scientific notation
np.set_printoptions(suppress=True)

# DIFFERENT WAYS OF SELECTING COLUMNS

### IMPORTS

In [2]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
movies = pd.read_csv('http://bit.ly/imdbratings')

In [3]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [4]:
use_cols = ['country', 'continent', 'total_litres_of_pure_alcohol']
drinks[use_cols].head()

Unnamed: 0,country,continent,total_litres_of_pure_alcohol
0,Afghanistan,Asia,0.0
1,Albania,Europe,4.9
2,Algeria,Africa,0.7
3,Andorra,Europe,12.4
4,Angola,Africa,5.9


### SELECT COLUMNS BY DATA TYPE

In [5]:
drinks.select_dtypes(include=['number', 'object']).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [6]:
drinks.select_dtypes(exclude='number').head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


### USING ILOC

In [7]:
start_row, end_row = 0, 4
start_col, end_col = 0, 2
drinks.iloc[start_row:end_row, start_col:end_col]

Unnamed: 0,country,beer_servings
0,Afghanistan,0
1,Albania,89
2,Algeria,25
3,Andorra,245


### USING LOC

In [8]:
drinks.loc[:,['country', 'wine_servings']].head()

Unnamed: 0,country,wine_servings
0,Afghanistan,0
1,Albania,54
2,Algeria,14
3,Andorra,312
4,Angola,45


# DIFFERENT WAYS OF FILTERING A DATAFRAME

In [59]:
movies = pd.read_csv('http://bit.ly/imdbratings')

In [25]:
movies.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


### FIND ALL MOVIES WHERE GENRE IS DRAMA

In [10]:
movies[movies['genre'] == 'Drama'].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
13,8.8,Forrest Gump,PG-13,Drama,142,"[u'Tom Hanks', u'Robin Wright', u'Gary Sinise']"
16,8.7,One Flew Over the Cuckoo's Nest,R,Drama,133,"[u'Jack Nicholson', u'Louise Fletcher', u'Mich..."
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."


### FIND MOVIES WHERE GENRE IS EITHER ACTION,  DRAMA OR WESTERN

#### THE HARD WAY

In [11]:
cond1 = movies['genre'] == 'Action'
cond2 = movies['genre'] == 'Drama'
cond3 = movies['genre'] == 'Western'
movies[cond1 | cond2 | cond3].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",NOT RATED,Western,161,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
11,8.8,Inception,PG-13,Action,148,"[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'..."


#### THE ELEGANT WAY

In [9]:
# get movies that belong to either of three categories
movies[movies['genre'].isin(['Action', 'Drama', 'Western'])].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",NOT RATED,Western,161,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
11,8.8,Inception,PG-13,Action,148,"[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'..."


### FILTER OUT MOVIES WHERE "GENRE" IS EQUAL TO  EITHER "ACTION", "DRAMA" OR "WESTERN"

In [46]:
# find movies that do not belong to either category
movies[~movies['genre'].isin(['Action', 'Drama', 'Western'])].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
7,8.9,The Lord of the Rings: The Return of the King,R,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."


### FIND DRAMAS, WITH A RATING OF ABOVE 8, AND A DURATION LONGER THAN 120 MINUTES

In [43]:
cond_1 = movies['genre'].isin(['Drama'])
cond_2 = movies['star_rating'] > 8.0
cond_3 = movies['duration'] > 120
movies[cond_1 & cond_2 & cond_3].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
13,8.8,Forrest Gump,R,Drama,142,"[u'Tom Hanks', u'Robin Wright', u'Gary Sinise']"
16,8.7,One Flew Over the Cuckoo's Nest,R,Drama,133,"[u'Jack Nicholson', u'Louise Fletcher', u'Mich..."
17,8.7,Seven Samurai,R,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
22,8.7,It's a Wonderful Life,R,Drama,130,"[u'James Stewart', u'Donna Reed', u'Lionel Bar..."


### FIND DRAMAS, WITH A RATING OF ABOVE 8, AND A DURATION LONGER THAN 200 MIN, OR SHORTER THAN 70 MIN

In [18]:
cond_1 = movies['genre'].isin(['Drama'])
cond_2 = movies['star_rating'] > 8.0
cond_3 = movies['duration'] > 200
cond_4 = movies['duration'] < 70
movies[cond_1 & cond2 & (cond_3 | cond_4)]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
157,8.2,Gone with the Wind,G,Drama,238,"[u'Clark Gable', u'Vivien Leigh', u'Thomas Mit..."
389,8.0,Freaks,UNRATED,Drama,64,"[u'Wallace Ford', u'Leila Hyams', u'Olga Bacla..."
476,7.8,Hamlet,PG-13,Drama,242,"[u'Kenneth Branagh', u'Julie Christie', u'Dere..."


### FIND THE MOVIES WITH THE TOP FIVE RATINGS

In [33]:
movies.nlargest(n=5, columns='star_rating', keep='all')

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",NOT RATED,Western,161,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."
8,8.9,Schindler's List,R,Biography,195,"[u'Liam Neeson', u'Ralph Fiennes', u'Ben Kings..."
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."


### FIND THE 5 SHORTEST MOVIES

In [35]:
movies.nsmallest(n=5, columns='duration', keep='all')

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
389,8.0,Freaks,UNRATED,Drama,64,"[u'Wallace Ford', u'Leila Hyams', u'Olga Bacla..."
338,8.0,Battleship Potemkin,UNRATED,History,66,"[u'Aleksandr Antonov', u'Vladimir Barsky', u'G..."
258,8.1,The Cabinet of Dr. Caligari,UNRATED,Crime,67,"[u'Werner Krauss', u'Conrad Veidt', u'Friedric..."
88,8.4,The Kid,NOT RATED,Comedy,68,"[u'Charles Chaplin', u'Edna Purviance', u'Jack..."
293,8.1,Duck Soup,PASSED,Comedy,68,"[u'Groucho Marx', u'Harpo Marx', u'Chico Marx']"


### FIND MOVIES WITH MORGAN FREEMAN

In [101]:
movies[movies['actors_list'].str.contains('Morgan Freeman')]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
24,8.7,Se7en,R,Drama,127,"[u'Morgan Freeman', u'Brad Pitt', u'Kevin Spac..."
119,8.3,Unforgiven,R,Western,131,"[u'Clint Eastwood', u'Gene Hackman', u'Morgan ..."
227,8.1,Million Dollar Baby,PG-13,Drama,132,"[u'Hilary Swank', u'Clint Eastwood', u'Morgan ..."
549,7.8,Lucky Number Slevin,R,Crime,110,"[u'Josh Hartnett', u'Ben Kingsley', u'Morgan F..."
621,7.7,Gone Baby Gone,R,Crime,114,"[u'Morgan Freeman', u'Ed Harris', u'Casey Affl..."
943,7.4,The Bucket List,PG-13,Adventure,97,"[u'Jack Nicholson', u'Morgan Freeman', u'Sean ..."
962,7.4,Driving Miss Daisy,PG,Comedy,99,"[u'Morgan Freeman', u'Jessica Tandy', u'Dan Ay..."


### SEE WHICH ACTORS/ACTRESSES OCCUR MOST OFTEN IN MOVIES WITH A STAR RATING ABOVE 8.0

In [4]:
from collections import Counter
pattern = '|'.join(['u', '\]', '\[', "'"])
movies['actors_list'] = movies['actors_list'].str.replace(pattern, '')

actors_l = []
for index, row in movies[movies['star_rating'] > 8.0].iterrows():
    actors = row['actors_list'].split(',')
    for actor in actors:
        actors_l.append(actor.strip())

Counter(actors_l).most_common(15)

[('Robert De Niro', 8),
 ('James Stewart', 8),
 ('Clint Eastwood', 6),
 ('Harrison Ford', 6),
 ('Cary Grant', 6),
 ('Al Pacino', 5),
 ('Brad Pitt', 5),
 ('Leonardo DiCaprio', 5),
 ('Tom Hanks', 5),
 ('Charles Chaplin', 5),
 ('William Holden', 5),
 ('Aamir Khan', 5),
 ('Pal Newman', 5),
 ('Morgan Freeman', 4),
 ('Marlon Brando', 4)]

### USING LOC - FILTER OUT MOVIES WHERE "GENRE" IS EQUAL TO  EITHER "ACTION", "DRAMA" OR "WESTERN"

In [36]:
movies.loc[~movies['genre'].isin(['Action', 'Drama', 'Western'])].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."


# QUERIES

In [55]:
movies.query('genre == "Crime" and star_rating > 9.0')

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."


### FIND MOVIES WITH RATING ABOVE MEAN, DURATION BELOW MEAN, AND SELECT THE HORROR GENRE

In [62]:
mean_rating = movies['star_rating'].mean()
mean_duration = movies['duration'].mean()
movies.query('star_rating > @mean_rating and duration <@mean_duration and genre == "Horror"')

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
39,8.6,Psycho,R,Horror,109,"[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi..."
57,8.5,Alien,R,Horror,117,"[u'Sigourney Weaver', u'Tom Skerritt', u'John ..."
161,8.2,Diabolique,UNRATED,Horror,116,"[u'Simone Signoret', u'V\xe9ra Clouzot', u'Pau..."
186,8.2,The Thing,R,Horror,109,"[u'Kurt Russell', u'Wilford Brimley', u'Keith ..."
330,8.0,Nosferatu,UNRATED,Horror,81,"[u'Max Schreck', u'Greta Schr\xf6der', u'Ruth ..."
351,8.0,Frankenstein,UNRATED,Horror,70,"[u'Colin Clive', u'Mae Clarke', u'Boris Karloff']"
353,8.0,Night of the Living Dead,UNRATED,Horror,96,"[u'Duane Jones', u""Judith O'Dea"", u'Karl Hardm..."
405,7.9,The Bride of Frankenstein,NOT RATED,Horror,75,"[u'Boris Karloff', u'Elsa Lanchester', u'Colin..."
