In [44]:
import pandas as pd

## Other data extractions - .loc and .iloc methods

In [45]:
bond = pd.read_csv('db/jamesbond.csv')
bond

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


### .set_index()

Setting the index after the data has been loaded

Using any other data as index will allow it to have duplicates, do the indexes may not be unique

In [46]:
bond.set_index(keys='Film', # The new index
               inplace=True) #Setting to true will overwrite it to the dataframe

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


### .reset_index()

Resetting the original indexes

In [47]:
bond.reset_index(inplace=True, drop=False) #the 'drop' parameter lets you decide if the current index should be dropped, 'False' is the default

bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


### .loc[ ] -> Retrieving rows by index label

Supports slicing opperations just as a python list would

In [48]:
bond.set_index(keys='Film', inplace=True)
bond.sort_index(inplace=True) # it is recommended to sort the values before retrieving any rows since there is an improvement in the time spent when looking through ordered data
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


    If there is only one correspondance, it will return a Series with the corresponding information for that row

In [49]:
bond.loc['Goldfinger']

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

    If there is more than one correspondance, it will return a data frame

In [50]:
bond.loc['Casino Royale'] 

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [51]:
bond.loc[['Live and Let Die','Die Another Day']] #multiple selections are made within brackets

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


    Creating a sliced selection

The difference between .loc[] and slicing a list is that the upper range is inclusive, whereas on the list it is exclusive (is not included on the extracted data)

In [52]:
bond.loc['Diamonds Are Forever':'Licence to Kill']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9


### Second arguments

In [53]:
bond.loc['Moonraker','Actor'] # looking for index 'Moonraker' on column 'Actor'

'Roger Moore'

In [54]:
bond.loc['Octopussy', ['Actor','Director']]

Actor       Roger Moore
Director      John Glen
Name: Octopussy, dtype: object

In [55]:
bond.loc[['Octopussy',"GoldenEye"], ['Actor','Director']]

Unnamed: 0_level_0,Actor,Director
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Octopussy,Roger Moore,John Glen
GoldenEye,Pierce Brosnan,Martin Campbell


In [56]:
bond.loc[['Octopussy',"GoldenEye"], 'Year':'Director'] # intervals may also be used on the arguments

Unnamed: 0_level_0,Year,Actor,Director
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopussy,1983,Roger Moore,John Glen
GoldenEye,1995,Pierce Brosnan,Martin Campbell


### .iloc[ ] -> Retrieving rows by index position

The upper range is exclusive

    The iloc works for both numeric or labeled indexes

In [57]:
bond.iloc[:5]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [58]:
bond.reset_index(inplace=True)

In [59]:
bond.iloc[:5] #does not include position 5

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
1,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
2,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
3,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
4,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [60]:
bond.iloc[[2,4]]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
2,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
4,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


### Second arguments

In [61]:
bond.iloc[11,2] # 11th row, 2nd column

'Roger Moore'

In [62]:
bond.iloc[[11,15],[1,3]]

Unnamed: 0,Year,Director
11,1973,Guy Hamilton
15,1969,Peter R. Hunt


In [63]:
bond.iloc[[11,15],1:4] # remembering that the upper range is exclusive

Unnamed: 0,Year,Actor,Director
11,1973,Roger Moore,Guy Hamilton
15,1969,George Lazenby,Peter R. Hunt


### Setting new values for a specific cell

In [64]:
bond.iloc[11,2] = 'EXAMPLE'

bond.iloc[11] 

Film                 Live and Let Die
Year                             1973
Actor                         EXAMPLE
Director                 Guy Hamilton
Box Office                      460.3
Budget                           30.8
Bond Actor Salary                 NaN
Name: 11, dtype: object

    Also works with .loc[] and for setting multiple values

In [65]:
bond.set_index(keys='Film', inplace=True)

bond.loc['Moonraker', ['Actor','Director','Box Office']] = ['Example1','Example2','0']
bond.loc['Moonraker']

Year                     1979
Actor                Example1
Director             Example2
Box Office                  0
Budget                   91.5
Bond Actor Salary         NaN
Name: Moonraker, dtype: object

### Setting multiple values in the DataFrame

    loc and iloc can also receive boolean or mathematical comparisons

In [66]:
#bond.loc[bond['Actor']=='Sean Connery'] = 'Sir Sean Connery' # this way it would fill the whole line and not only the 'Actor' column

bond.loc[bond['Actor']=='Sean Connery','Actor'] = 'Sir Sean Connery' # this way, it will fill only the 'Actor' column

bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2


### Renaming index labels or columns .rename()

    Renaming index labels

Using the mapper argument

In [67]:
bond.rename(mapper={'GoldenEye':'Golden Eye','Goldfinger':'Gold Finger'}, # acceps dictionaries | by default it looks to the '0' axis (row axis)
            inplace=True) # setting to true to change the df
bond.head(10)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Gold Finger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2


Using index argument

In [68]:
bond.rename(index={'Dr. No':'Doctor No'}, # also acceps dictionaries
            inplace=True) # setting to true to change the df
bond.head(10)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Doctor No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Gold Finger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2


    Renaming column Labels

Using the mapper argument

In [69]:
bond.rename(mapper={'Box Office':'Revenue'}, axis=1, # axis=1 indicates we're looking for these values on the columns
            inplace=True)

#OR

bond.rename(mapper={'Box Office':'Revenue'}, axis='columns', # axis='columns' indicates we're looking for these values on the columns, works the same as axis=1
            inplace=True)   

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


Using the columns argument directly

In [70]:
bond.rename(columns={'Bond Actor Salary':'Actor Salary'},# no need to set the axis argument
            inplace=True)  

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Revenue,Budget,Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


Overwriting the columns directly

In [71]:
bond.columns

Index(['Year', 'Actor', 'Director', 'Revenue', 'Budget', 'Actor Salary'], dtype='object')

In [72]:
bond.columns=['Year', 'Actor', 'Director', 'Box Office', 'Budget', 'Bond Salary']

bond.head(5)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


Overwriting based on a condition

In [73]:
bond.columns=[column_name.replace(" ","_") for column_name in bond.columns]

bond.columns

Index(['Year', 'Actor', 'Director', 'Box_Office', 'Budget', 'Bond_Salary'], dtype='object')

### Deleting rows or Columns

.drop() method

    Lines

In [74]:
bond.drop('A View to a Kill', inplace=True)

#if 2 or more indexes have the same label name, all will be removed
bond.drop(['Casino Royale','Diamonds Are Forever'], inplace=True) # for multiple lines, just add a list

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Doctor No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,


    Columns

In [75]:
bond.drop('Bond_Salary', axis=1) # for columns the axis must be specified

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2
Doctor No,1962,Sir Sean Connery,Terence Young,448.8,7.0
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9
Gold Finger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7
Live and Let Die,1973,EXAMPLE,Guy Hamilton,460.3,30.8
Moonraker,1979,Example1,Example2,0.0,91.5
Never Say Never Again,1983,Sir Sean Connery,Irvin Kershner,380.0,86.0


.pop( method) -> permanet operation

    as it is permanet and returns the Series, we can store it on a variable

In [76]:
bond.pop('Budget') 

#budget = bond.pop('Budget') 

Film
Die Another Day                    154.2
Doctor No                            7.0
For Your Eyes Only                  60.2
From Russia with Love               12.6
Golden Eye                          76.9
Gold Finger                         18.6
Licence to Kill                     56.7
Live and Let Die                    30.8
Moonraker                           91.5
Never Say Never Again               86.0
Octopussy                           53.9
On Her Majesty's Secret Service     37.3
Quantum of Solace                  181.4
Skyfall                            170.2
Spectre                            206.3
The Living Daylights                68.8
The Man with the Golden Gun         27.7
The Spy Who Loved Me                45.1
The World Is Not Enough            158.3
Thunderball                         41.9
Tomorrow Never Dies                133.9
You Only Live Twice                 59.9
Name: Budget, dtype: float64

del keyword -> also a permanen operation

In [77]:
del bond['Director']

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Die Another Day,2002,Pierce Brosnan,465.4,17.9
Doctor No,1962,Sir Sean Connery,448.8,0.6
For Your Eyes Only,1981,Roger Moore,449.4,


### Creating Random samples

In [78]:
bond.sample() # it will extract a single random row by default

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
For Your Eyes Only,1981,Roger Moore,449.4,


In [79]:
bond.sample(axis=1) # or axis='column' will extract a random column

Unnamed: 0_level_0,Box_Office
Film,Unnamed: 1_level_1
Die Another Day,465.4
Doctor No,448.8
For Your Eyes Only,449.4
From Russia with Love,543.8
Golden Eye,518.5
Gold Finger,820.4
Licence to Kill,250.9
Live and Let Die,460.3
Moonraker,0.0
Never Say Never Again,380.0


In [80]:
bond.sample(n=5) # n = number of rows, the order is also random

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tomorrow Never Dies,1997,Pierce Brosnan,463.2,10.0
Golden Eye,1995,Pierce Brosnan,518.5,5.1
Octopussy,1983,Roger Moore,373.8,7.8
The Living Daylights,1987,Timothy Dalton,313.5,5.2
Doctor No,1962,Sir Sean Connery,448.8,0.6


In [81]:
bond.sample(frac=.25) # we may provide a fraction/percentage instead of the number of samples

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
You Only Live Twice,1967,Sir Sean Connery,514.2,4.4
Never Say Never Again,1983,Sir Sean Connery,380.0,
Golden Eye,1995,Pierce Brosnan,518.5,5.1
The Spy Who Loved Me,1977,Roger Moore,533.0,
Moonraker,1979,Example1,0.0,
Licence to Kill,1989,Timothy Dalton,250.9,7.9


### .nsmallest() and .nlargest() methods 

In [82]:
bond.nlargest(3, columns='Bond_Salary') #first argument is the rows number 'n=' we can used the declared argument or just the positional argument
# returns a dataframe

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Die Another Day,2002,Pierce Brosnan,465.4,17.9
Skyfall,2012,Daniel Craig,943.5,14.5
The World Is Not Enough,1999,Pierce Brosnan,439.5,13.5


In [83]:
bond['Bond_Salary'].nlargest(3) # returns a series

Film
Die Another Day            17.9
Skyfall                    14.5
The World Is Not Enough    13.5
Name: Bond_Salary, dtype: float64

In [84]:
bond.nsmallest(n=2, columns='Bond Salary') # returns a dataframe

KeyError: 'Bond Salary'

In [None]:
bond['Bond Salary'].nsmallest(2) # returns a series

Film
Doctor No                          0.6
On Her Majesty's Secret Service    0.6
Name: Bond Salary, dtype: float64

### Filtering with the .where() method

Accepts boolean expressions

In [None]:
bond.where(bond['Year']>1980)

Unnamed: 0_level_0,Year,Actor,Revenue,Bond Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Die Another Day,2002.0,Pierce Brosnan,465.4,17.9
Doctor No,,,,
For Your Eyes Only,1981.0,Roger Moore,449.4,
From Russia with Love,,,,
Golden Eye,1995.0,Pierce Brosnan,518.5,5.1
Gold Finger,,,,
Licence to Kill,1989.0,Timothy Dalton,250.9,7.9
Live and Let Die,,,,
Moonraker,,,,
Never Say Never Again,1983.0,Sir Sean Connery,380.0,


    Multiple arguments

In [None]:
mask1 = bond['Year'] >2010
mask2 = bond['Actor'] == 'Daniel Craig'

bond.where(mask1 & mask2)

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Die Another Day,,,,
Doctor No,,,,
For Your Eyes Only,,,,
From Russia with Love,,,,
Golden Eye,,,,
Gold Finger,,,,
Licence to Kill,,,,
Live and Let Die,,,,
Moonraker,,,,
Never Say Never Again,,,,


### .query() method

Accepts boolean expressions inside a string

In [None]:
bond.query('Actor == "Daniel Craig"') # it is necessary to use a combination of double and single quotes, the order does not matter

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Quantum of Solace,2008,Daniel Craig,514.2,8.1
Skyfall,2012,Daniel Craig,943.5,14.5
Spectre,2015,Daniel Craig,726.7,


In [None]:
bond.query('Actor == "Daniel Craig" and Year > 2010')

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Skyfall,2012,Daniel Craig,943.5,14.5
Spectre,2015,Daniel Craig,726.7,


In [None]:
bond.query("Actor in ['Roger Moore', 'Timothy Dalton']")

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
For Your Eyes Only,1981,Roger Moore,449.4,
Licence to Kill,1989,Timothy Dalton,250.9,7.9
Octopussy,1983,Roger Moore,373.8,7.8
The Living Daylights,1987,Timothy Dalton,313.5,5.2
The Man with the Golden Gun,1974,Roger Moore,334.0,
The Spy Who Loved Me,1977,Roger Moore,533.0,


In [None]:
bond.query("Actor not in ['Roger Moore', 'Timothy Dalton','Sir Sean Connery']")

Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Die Another Day,2002,Pierce Brosnan,465.4,17.9
Golden Eye,1995,Pierce Brosnan,518.5,5.1
Live and Let Die,1973,EXAMPLE,460.3,
Moonraker,1979,Example1,0.0,
On Her Majesty's Secret Service,1969,George Lazenby,291.5,0.6
Quantum of Solace,2008,Daniel Craig,514.2,8.1
Skyfall,2012,Daniel Craig,943.5,14.5
Spectre,2015,Daniel Craig,726.7,
The World Is Not Enough,1999,Pierce Brosnan,439.5,13.5
Tomorrow Never Dies,1997,Pierce Brosnan,463.2,10.0


### .apply() method

applies a function to a series or dataframe

In [None]:
def convert_values(number):
    return str(number)+" Millions"

columns = ['Box_Office', 'Bond_Salary']

for col in columns:
    bond[col] = bond[col].apply(convert_values)

bond.head(5)


Unnamed: 0_level_0,Year,Actor,Box_Office,Bond_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Die Another Day,2002,Pierce Brosnan,465.4 Millions,17.9 Millions
Doctor No,1962,Sir Sean Connery,448.8 Millions,0.6 Millions
For Your Eyes Only,1981,Roger Moore,449.4 Millions,nan Millions
From Russia with Love,1963,Sir Sean Connery,543.8 Millions,1.6 Millions
Golden Eye,1995,Pierce Brosnan,518.5 Millions,5.1 Millions


In [85]:
def ranking (row):
    revenue=row[2]

    if int(revenue) > 500:
        return 'Expensive'
    else:
        return 'Cheap'

bond.apply(ranking, axis='columns') # for every row we will travell through all columns

Film
Die Another Day                        Cheap
Doctor No                              Cheap
For Your Eyes Only                     Cheap
From Russia with Love              Expensive
Golden Eye                         Expensive
Gold Finger                        Expensive
Licence to Kill                        Cheap
Live and Let Die                       Cheap
Moonraker                              Cheap
Never Say Never Again                  Cheap
Octopussy                              Cheap
On Her Majesty's Secret Service        Cheap
Quantum of Solace                  Expensive
Skyfall                            Expensive
Spectre                            Expensive
The Living Daylights                   Cheap
The Man with the Golden Gun            Cheap
The Spy Who Loved Me               Expensive
The World Is Not Enough                Cheap
Thunderball                        Expensive
Tomorrow Never Dies                    Cheap
You Only Live Twice                Expensive
dtype