In [1]:
import pandas as pd

In [5]:
bond = pd.read_csv('datasets/jamesbond.csv')
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


# Section 5; Part 69
Using `set_index()` and `reset_index()`

In [7]:
# Reminder that read_csv has index_col parameter
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [19]:
# Import without index_col
bond = pd.read_csv('datasets/jamesbond.csv')
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [10]:
# Can set index after import
bond.set_index("Film", inplace=True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [13]:
# reset_index will move current index back into column position and create new numeric index
# drop parameter = Default False; If set to true, will NOT move index back to column
bond.reset_index(inplace=True)
bond.head()

Unnamed: 0,index,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [18]:
# Change index
# If use "set_index" multiple times without reseting, it will remove the current index
# Notice with this, that "Film" is gone
bond.set_index("Film", inplace=True)
bond.set_index("Year")
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [21]:
# To fix the above, need to use "reset_index" prior to setting new index
bond.set_index("Film", inplace=True)
bond.reset_index(inplace=True)
bond.set_index("Year", inplace=True)
bond.head()

Unnamed: 0_level_0,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2
1965,Thunderball,Sean Connery,Terence Young,848.1,41.9,4.7
1967,Casino Royale,David Niven,Ken Hughes,315.0,85.0,


# Section 5; Part 70
Retreive rows by index label with `.loc[]`

 - `.loc` called directly on dataframe
 - Does NOT use parameters, uses square brackets `[]`

In [22]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
# Sorting an index can improve extraction speed
bond.sort_index(inplace=True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [23]:
# Single value returns a series, with the columns headers becoming the index labels of the series
bond.loc["Goldfinger"]

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [24]:
# If label doesn't exist, error is thrown
bond.loc["Not exist"]

KeyError: 'the label [Not exist] is not in the [index]'

In [25]:
# If index label is available more than once, all rows are returned as a dataframe
# Reminder: indexes do NOT have to be unique
bond.loc['Casino Royale']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [26]:
# Extracting sequential values
# Extract everything between "Diamonds are forever" and "Moonraker"
#    This is inclusive
bond.loc["Diamonds Are Forever" : "Moonraker"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [27]:
# Extract everything after Golden Eye
bond.loc["GoldenEye":]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [28]:
# Extract every thing from begining to "Her Majesty's Secret Service"
bond.loc[:"Her Majesty's Secret Service"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [30]:
# Extract multiple nonsequential values
#   Extracts in the order provided to the `loc` method
bond.loc[["Octopussy", "Moonraker"]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [31]:
# Providing a non-existing key to the loc when using multiple values, a empty row is added with all NaN (no error)
bond.loc[["For Your Eyes Only", "Live and Let Die", "Gold Bond"]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
For Your Eyes Only,1981.0,Roger Moore,John Glen,449.4,60.2,
Live and Let Die,1973.0,Roger Moore,Guy Hamilton,460.3,30.8,
Gold Bond,,,,,,


In [32]:
# Can test if value is in the index
"Gold Bond" in bond.index

False

# Section 5; Part 71
Retreive rows by index position with `iloc[]`

 - Short for "index location"

In [2]:
bond = pd.read_csv('datasets/jamesbond.csv')

In [5]:
# In this example, `loc[]` would return the same thing
bond.iloc[15] 

Film                 A View to a Kill
Year                             1985
Actor                     Roger Moore
Director                    John Glen
Box Office                      275.2
Budget                           54.5
Bond Actor Salary                 9.1
Name: 15, dtype: object

In [6]:
# Specific indexes
bond.iloc[[15,20]]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
15,A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5


In [7]:
# Range of indexes
bond.iloc[:4]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [10]:
# Reimport and set index column
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [9]:
bond.loc["GoldenEye"]

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [11]:
# even though we have an index of Film (with strings), there is a numeric index behind the scenes
bond.iloc[0]  # Returns "A view to a kill"

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

# Section 5; Part 72
The catch-all `ix[]` method

 - Functions as a combination of `loc[]` and `iloc[]`
 - Captures ability to extract based on index label OR index position

In [15]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [13]:
# By index label
bond.ix["GoldenEye"]

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [14]:
bond.ix[0]

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

In [17]:
# When passing index positions, not index labels, the second number is not inclusive
bond.ix[10:15]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [16]:
# Provide invalid value in a list, returns NaN like in previous lessons but does NOT exist in dataframe
bond.ix[["Spectre", "Gold Bond"]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015.0,Daniel Craig,Sam Mendes,726.7,206.3,
Gold Bond,,,,,,


In [20]:
# Passing an invalid index position results in an error, unlike invalid index labels
bond.ix[[8,900]]

IndexError: indices are out-of-bounds

# Section 5, Part 73
Second arguments to `loc`, `iloc` and `ix`

 - Second argument is column name that we want

In [21]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [22]:
bond.ix["Moonraker", "Year"]

1979

In [24]:
# Can take a range of columns
bond.loc["GoldenEye", "Director":"Budget"]

Director      Martin Campbell
Box Office              518.5
Budget                   76.9
Name: GoldenEye, dtype: object

In [25]:
# Can take list of columns
bond.loc["GoldenEye", ["Actor", "Budget", "Year"]]

Actor     Pierce Brosnan
Budget              76.9
Year                1995
Name: GoldenEye, dtype: object

In [26]:
# Can use index positions (2 = Director)
bond.iloc[14, 2]

'John Glen'

In [27]:
# Can mix and match with `ix`
bond.ix[20, "Budget"]

27.699999999999999

# Section 5, Part 74
Set new values for specific cell or row

In [28]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [29]:
bond.ix["Dr. No"]

Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

In [30]:
# Use the ix method to set a single value
# Requires the second argument to specify the column
# Change actor in Dr. No to "Sir Sean Connery"
bond.ix["Dr. No", "Actor"] = "Sir Sean Connery"

In [31]:
bond.ix["Dr. No"]

Year                             1962
Actor                Sir Sean Connery
Director                Terence Young
Box Office                      448.8
Budget                              7
Bond Actor Salary                 0.6
Name: Dr. No, dtype: object

In [32]:
bond.ix["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]]

Box Office           448.8
Budget                   7
Bond Actor Salary      0.6
Name: Dr. No, dtype: object

In [34]:
# Update multiple values
bond.ix["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] = [448800000, 7000000, 600000]
bond.ix["Dr. No"]

Year                             1962
Actor                Sir Sean Connery
Director                Terence Young
Box Office                  4.488e+08
Budget                          7e+06
Bond Actor Salary              600000
Name: Dr. No, dtype: object

# Section 5; Part 75
Set multiple values in dataframe

In [3]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [5]:
# Change all occurances of Sean Connery in Actor column to "Sir Sean Connery"
# INCORRECT WAY!

# Get Series with Sean Connery
mask = bond['Actor'] == "Sean Connery"
# Reset to "Sir ..."  CREATES AN ERROR!
bond[mask]["Actor"] = "Sir Sean Connery"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Error occurs because `bond[mask]` gives a new **copy** of a slice of the dataframe, not the original dataframe. Values are being modified in the copy

In [7]:
# Correct way
# Use .ix and pass in a boolean series
bond.ix[mask]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [8]:
bond.ix[mask] = "Sir Sean Connery"

In [9]:
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315,85,
Diamonds Are Forever,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery,Sir Sean Connery


# Section 5; Part 76
Rename index labels or columns

 - `rename()` - Take a dictionary where each key is the existing field and value is the new value
 - Use the `columns` attribute - Requires using ALL columnes at once instead of dictionary method above

In [14]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [19]:
# Rename a few columns
bond.rename(columns={"Year": "Release Date", "Box Office": "Revenue"}, inplace=True)
bond.head()

Unnamed: 0_level_0,Release Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [20]:
# Rename an index
bond.rename(index={"Dr. No": "Doctor No", "GoldenEye": "Golden Eye"}, inplace=True)
bond

Unnamed: 0_level_0,Release Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Doctor No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [17]:
bond.ix["Doctor No"]

Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Doctor No, dtype: object

In [18]:
bond.columns

Index([u'Year', u'Actor', u'Director', u'Box Office', u'Budget',
       u'Bond Actor Salary'],
      dtype='object')

In [21]:
bond.columns = ["Year of Release", "Actor", "Director", "Gross", "Cost", "Salary"]
bond.head()

Unnamed: 0_level_0,Year of Release,Actor,Director,Gross,Cost,Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


# Section 5; Part 77
Delete Rows or Columns from a dataframe

 - `drop()` - Removes single or multiple values
 - `pop()` - Accepts single series and removes it; Returns the popped series (does not require `inplace`)
 - `del()` - Will take a column and delete it from dataframe (does not require `inplace`)

In [2]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [3]:
# drop - 
#   labels - takes single or list of values that when matched will be removed
# Returns new dataframe with rows removed  (takes an inplace=True)
bond.drop("A View to a Kill")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9


In [6]:
# Will remove all duplicates (Casino Royale)
bond.drop(["A View to a Kill", "Die Another Day", "Casino Royale"])

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [8]:
# Remove Box Office Column
bond.drop("Box Office", axis='columns')

Unnamed: 0_level_0,Year,Actor,Director,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,Roger Moore,John Glen,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,18.6,3.2


In [9]:
actor = bond.pop("Actor")

In [10]:
# Actor series is now in variable
actor

Film
A View to a Kill                      Roger Moore
Casino Royale                        Daniel Craig
Casino Royale                         David Niven
Diamonds Are Forever                 Sean Connery
Die Another Day                    Pierce Brosnan
Dr. No                               Sean Connery
For Your Eyes Only                    Roger Moore
From Russia with Love                Sean Connery
GoldenEye                          Pierce Brosnan
Goldfinger                           Sean Connery
Licence to Kill                    Timothy Dalton
Live and Let Die                      Roger Moore
Moonraker                             Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
On Her Majesty's Secret Service    George Lazenby
Quantum of Solace                    Daniel Craig
Skyfall                              Daniel Craig
Spectre                              Daniel Craig
The Living Daylights               Timothy Da

In [11]:
# Actor is removed from dataframe
bond

Unnamed: 0_level_0,Year,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,John Glen,275.2,54.5,9.1
Casino Royale,2006,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,John Glen,449.4,60.2,
From Russia with Love,1963,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Guy Hamilton,820.4,18.6,3.2


In [12]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [13]:
# use del to remove Director column
del(bond['Director'])
bond.head()

Unnamed: 0_level_0,Year,Actor,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,Roger Moore,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,581.5,145.3,3.3
Casino Royale,1967,David Niven,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,465.4,154.2,17.9


# Section 5; Part 78
Create random sample with `sample()`

Each row is pulled at random, thus rows can be out of order from original dataframe

In [14]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [19]:
# Extract 1 random row
bond.sample()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,


In [20]:
# Extract 5 random rows
bond.sample(n=5)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8


In [21]:
# Extract a fraction of the dataframe
bond.sample(frac=0.25)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,


In [23]:
# Extract random columns
bond.sample(n=2, axis='columns')

Unnamed: 0_level_0,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
A View to a Kill,54.5,9.1
Casino Royale,145.3,3.3
Casino Royale,85.0,
Diamonds Are Forever,34.7,5.8
Die Another Day,154.2,17.9
Dr. No,7.0,0.6
For Your Eyes Only,60.2,
From Russia with Love,12.6,1.6
GoldenEye,76.9,5.1
Goldfinger,18.6,3.2


# Section 5; Part 79
`nsmallest()` and `nlargest()` methods

 - `nsmallest()` - Smallest values in specific column
 - `nlargest()` - Largest values in specific column

In [24]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [25]:
# Option 1 is using `sort_values`
bond.sort_values("Box Office", ascending=False).head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [28]:
# Option 2 is using nlargest, that does the above
# More efficient than sorting by values for large dataframes
bond.nlargest(3, columns = "Box Office")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [29]:
# Get smallest box office grosses
bond.nsmallest(2, "Box Office")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [30]:
# Get top largest bugests
bond.nlargest(3, "Budget")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [31]:
# Smallest salaries
# Notice this returns a NaN values
bond.nsmallest(6, "Bond Actor Salary")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [74]:
# Removing that NaN
# nsmallest/nlargest don't have an option to drop NaNs so we have to do it before we call the method
#   The dropna is a temporary operation (no inplace parameter) then we call nsmallest on the dataframe that
#   created without NaNs
bond.dropna(subset = ["Bond Actor Salary"]).nsmallest(6, "Bond Actor Salary")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [32]:
# Can be called on a series directly
bond["Box Office"].nlargest(8)

Film
Skyfall                  943.5
Thunderball              848.1
Goldfinger               820.4
Spectre                  726.7
Casino Royale            581.5
From Russia with Love    543.8
Moonraker                535.0
The Spy Who Loved Me     533.0
Name: Box Office, dtype: float64

# Section 5; Part 80
Filtering with the `.where()` method

Returns original dataframe, but rows that match the condition will have all values present, others will have NaN values

In [34]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [36]:
# Extract rows where actor is Sean Connery
mask = bond['Actor'] == "Sean Connery"
bond[mask]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [37]:
# Alternative using where()
bond.where(mask)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,,,,,,
Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [39]:
box_mask = bond["Box Office"] > 800
bond.where(box_mask)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [40]:
# Multiple conditions to where method
bond.where(mask & box_mask)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# Section 5; Part 81
The `query()` method

 - Argument will be a string
 - Column names must not have spaces

In [41]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [45]:
# Fix our column names to have no spaces
bond.columns = [column_name.replace(" ", "_") for column_name in bond.columns]
bond.columns

Index([u'Year', u'Actor', u'Director', u'Box_Office', u'Budget',
       u'Bond_Actor_Salary'],
      dtype='object')

In [47]:
# Extract rows where actor is Sean Connery
# Notice the entire argument is a string
# Notice the quotes in quotes for "Sean Connery"
bond.query('Actor == "Sean Connery"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [48]:
# Extract Director is Terence Young
bond.query("Director == 'Terence Young'")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [49]:
# Negation works
bond.query("Actor != 'Roger Moore'")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [50]:
# Box office is over 600
# Remember we changes spaces to _
bond.query("Box_Office > 600")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [51]:
# Multiple queries
# Actor is Roger Moore and Director is John Glen
#  Use "and" or "or" not symbols
bond.query("Actor == 'Roger Moore' and Director == 'John Glen'")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [52]:
# Can use `in` and `not in`
# All actors except Timothy Dalton and George Lazenby
bond.query("Actor not in ['Timothy Dalton', 'George Lazenby']")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# Section 5; Part 82
A review of the `apply()` method on single columns

 - Performs action to every value in a series

In [58]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [59]:
# Change Box Office, Budget and Salary to strings and add "millions"
# This will work on NaN values
def convert_to_string_and_add_millions(number):
    return str(number) + " MILLIONS!"

bond["Bond Actor Salary"].apply(convert_to_string_and_add_millions)

Film
A View to a Kill                    9.1 MILLIONS!
Casino Royale                       3.3 MILLIONS!
Casino Royale                       nan MILLIONS!
Diamonds Are Forever                5.8 MILLIONS!
Die Another Day                    17.9 MILLIONS!
Dr. No                              0.6 MILLIONS!
For Your Eyes Only                  nan MILLIONS!
From Russia with Love               1.6 MILLIONS!
GoldenEye                           5.1 MILLIONS!
Goldfinger                          3.2 MILLIONS!
Licence to Kill                     7.9 MILLIONS!
Live and Let Die                    nan MILLIONS!
Moonraker                           nan MILLIONS!
Never Say Never Again               nan MILLIONS!
Octopussy                           7.8 MILLIONS!
On Her Majesty's Secret Service     0.6 MILLIONS!
Quantum of Solace                   8.1 MILLIONS!
Skyfall                            14.5 MILLIONS!
Spectre                             nan MILLIONS!
The Living Daylights                5.2 MILLI

In [60]:
# Save values by reassigning back to series
#  Can iterate over a series of columns instead of listing them one at a time
columns = ['Box Office', 'Budget', 'Bond Actor Salary']
for c in columns:
    bond[c] = bond[c].apply(convert_to_string_and_add_millions)
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 MILLIONS!,54.5 MILLIONS!,9.1 MILLIONS!
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 MILLIONS!,145.3 MILLIONS!,3.3 MILLIONS!
Casino Royale,1967,David Niven,Ken Hughes,315.0 MILLIONS!,85.0 MILLIONS!,nan MILLIONS!
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5 MILLIONS!,34.7 MILLIONS!,5.8 MILLIONS!
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4 MILLIONS!,154.2 MILLIONS!,17.9 MILLIONS!
Dr. No,1962,Sean Connery,Terence Young,448.8 MILLIONS!,7.0 MILLIONS!,0.6 MILLIONS!
For Your Eyes Only,1981,Roger Moore,John Glen,449.4 MILLIONS!,60.2 MILLIONS!,nan MILLIONS!
From Russia with Love,1963,Sean Connery,Terence Young,543.8 MILLIONS!,12.6 MILLIONS!,1.6 MILLIONS!
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5 MILLIONS!,76.9 MILLIONS!,5.1 MILLIONS!
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4 MILLIONS!,18.6 MILLIONS!,3.2 MILLIONS!


# Section 5; Part 83
The `apply()` method with Row Values

In [61]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [62]:
# Create a system of rankings for each movie
def good_movie(row):
    """row will be a list of values for each row"""
    actor = row[1]
    budget = row[4]
    if actor == "Pierce Brosnan":
        return "The best"
    elif actor == "Roger Moore" and budget > 40:
        return "Enjoyable"
    else:
        return "I have no clue"
    
# Passing 'columns' to axis because we are moving across the columns to provide the list (seems counter intuitive)
bond.apply(good_movie, axis='columns')

Film
A View to a Kill                        Enjoyable
Casino Royale                      I have no clue
Casino Royale                      I have no clue
Diamonds Are Forever               I have no clue
Die Another Day                          The best
Dr. No                             I have no clue
For Your Eyes Only                      Enjoyable
From Russia with Love              I have no clue
GoldenEye                                The best
Goldfinger                         I have no clue
Licence to Kill                    I have no clue
Live and Let Die                   I have no clue
Moonraker                               Enjoyable
Never Say Never Again              I have no clue
Octopussy                               Enjoyable
On Her Majesty's Secret Service    I have no clue
Quantum of Solace                  I have no clue
Skyfall                            I have no clue
Spectre                            I have no clue
The Living Daylights               I have no 

In [63]:
# assign to a new column
bond['Rating'] = bond.apply(good_movie, axis='columns')
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary,Rating
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1,Enjoyable
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3,I have no clue
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,,I have no clue
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8,I have no clue
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9,The best
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6,I have no clue
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,,Enjoyable
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6,I have no clue
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1,The best
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2,I have no clue


# Section 5; Part 84
The `copy()` method

 - Creates a copy of an existing pandas object but stores it separately

In [64]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)

In [66]:
# Explaination of why we want to use copy
# Assign to a variable
directors = bond["Director"]
directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [68]:
# Change "John Glen" to "Mr. John Glen" - Gets warning but does change
directors['A View to a Kill'] = "Mr. John Glen"
directors.head(3)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Film
A View to a Kill      Mr. John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [69]:
# Dataframe is also modified, even though we acted on the directors variable
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,Mr. John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [70]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = "Film")
bond.sort_index(inplace=True)
# Solution to prevent over writing one another is to use a copy and utilize the copy
directors = bond["Director"].copy()

In [71]:
# Can now do the above without affecting the dataframe
directors['A View to a Kill'] = "Mr. John Glen"
directors.head(3)

Film
A View to a Kill      Mr. John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [72]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
