In [1]:
import pandas as pd
import numpy as np

ds = '../../datasets/pandas-cookbook/movie.csv'
movies = pd.read_csv(ds, delimiter=',')

#### 1. Select one column and return as series

In [14]:
movies['actor_1_name'].head(3)

0        CCH Pounder
1        Johnny Depp
2    Christoph Waltz
Name: actor_1_name, dtype: object

#### 2. Select one column and return as dataframe 

In [12]:
movies[['actor_1_name']].head(3)

Unnamed: 0,actor_1_name
0,CCH Pounder
1,Johnny Depp
2,Christoph Waltz


#### 3. Select several columns

In [11]:
cols = ['actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name']
movies[cols].head(3)

Unnamed: 0,actor_1_name,actor_2_name,actor_3_name,director_name
0,CCH Pounder,Joel David Moore,Wes Studi,James Cameron
1,Johnny Depp,Orlando Bloom,Jack Davenport,Gore Verbinski
2,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Sam Mendes


#### 4. Select columns using the `select_dtype` method

- The use either `include` or `exclude` as the argument. You can select by one datatype or a list of datatypes.
- common dtypes include but are not limited to: 
    - `int` 
    - `number`
    - `object` 
    - `category` 
    - `datetime`
    - `datetime64`
    - `timedelta`

In [15]:
movies.select_dtypes(include=['float', 'int']).head(3)

Unnamed: 0,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_1_facebook_likes,gross,facenumber_in_poster,num_user_for_reviews,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio
0,723.0,178.0,0.0,855.0,1000.0,760505847.0,0.0,3054.0,237000000.0,2009.0,936.0,7.9,1.78
1,302.0,169.0,563.0,1000.0,40000.0,309404152.0,0.0,1238.0,300000000.0,2007.0,5000.0,7.1,2.35
2,602.0,148.0,0.0,161.0,11000.0,200074175.0,1.0,994.0,245000000.0,2015.0,393.0,6.8,2.35


#### 4. Select columns using the `filter` method

In [16]:
cols = ['actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name']

# filter on exact column names
movies.filter(items=cols).head(n=2)

Unnamed: 0,actor_1_name,actor_2_name,actor_3_name,director_name
0,CCH Pounder,Joel David Moore,Wes Studi,James Cameron
1,Johnny Depp,Orlando Bloom,Jack Davenport,Gore Verbinski


In [18]:
# filter on column names that include 'facebook'
movies.filter(like='facebook').head(2)

Unnamed: 0,director_facebook_likes,actor_3_facebook_likes,actor_1_facebook_likes,cast_total_facebook_likes,actor_2_facebook_likes,movie_facebook_likes
0,0.0,855.0,1000.0,4834,936.0,33000
1,563.0,1000.0,40000.0,48350,5000.0,0


In [19]:
# filter on column names that include a digit
movies.filter(regex=r'\d').head(2)

Unnamed: 0,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,actor_1_name,actor_3_name,actor_2_facebook_likes
0,855.0,Joel David Moore,1000.0,CCH Pounder,Wes Studi,936.0
1,1000.0,Orlando Bloom,40000.0,Johnny Depp,Jack Davenport,5000.0
