## Intro to the DataFrames III Module + Import Dataset

In [2]:
import pandas as pd

In [71]:
bond = pd.read_csv('jamesbond.csv')
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


## The .set_index() and .reset_index() Methods

In [31]:
## We can set an index for the dataframe when we read the file: pd.read_csv('jamesbond.csv', index_col='Film')
## .set_index() function is called on a dataframe
bond.set_index(keys = 'Film', inplace = True)
bond.head(3)
## keys parameter in .set_index() can be one string or a list of strings(multi-index)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [32]:
## .reset_index() function move the current index back into a column position(by default) and create the standard index
bond.reset_index().head(3)
bond.reset_index(inplace = True, drop = False)

In [29]:
bond.reset_index(drop = True).head(3)
## drop = True will drop the old index and create new standard number index

Unnamed: 0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [42]:
bond.set_index('Film', inplace = True)
bond.set_index('Year').head(3)   ## if we set_index to another column, the Year will replace the Film, and Film will be overwritten

Unnamed: 0_level_0,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962,Sean Connery,Terence Young,448.8,7.0,0.6
1963,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [49]:
## To solve this, we need to reset the index to be number and then set the index to be Year
bond.set_index('Film', inplace = True)
bond.reset_index(inplace = True)
bond.set_index('Year', inplace = True)
bond.head(3)

Unnamed: 0_level_0,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## Retrieve Rows by Index Label with .loc[]

In [72]:
bond = pd.read_csv('jamesbond.csv', index_col = 'Film')
bond.sort_index(inplace = True)   ## sorting the index for large dataframes can faster program and more efficient
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [57]:
## .loc[] is called directly on dataframe to extract rows, put the index into the bracket
bond.loc['GoldenEye']
bond.loc['Goldfinger']  ## put a single value extract a series

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [59]:
## cite the file(index) does not exist
# bond.loc['Sacred Bond']  ## Give an error

In [60]:
## Cite the index with more than one row in dataframe, it will extract all of them
bond.loc['Casino Royale']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [76]:
## Extract sequential rows, remind: that 'Moonraker' here is inclusive in the output unlike extract in the series/list/arrays
bond.loc['Diamonds Are Forever' : 'Moonraker'].head(3)
bond.loc['GoldenEye': ].head(3)   ## extract from the GoldenEye to the end
bond.loc[: "On Her Majesty's Secret Service"].tail(3)  ## Extract from the very beginning to the Majesty's

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [78]:
bond.loc[[ 'Octopussy', 'Moonraker']]  ## extract multiple rows not in order

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [79]:
##  Last movie does not exist, but it wll included in the output, be careful, like the .get() function
bond.loc[['For Your Eyes Only', 'Live and Let Die', 'Gold Bond']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  from ipykernel import kernelapp as app


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
For Your Eyes Only,1981.0,Roger Moore,John Glen,449.4,60.2,
Live and Let Die,1973.0,Roger Moore,Guy Hamilton,460.3,30.8,
Gold Bond,,,,,,


In [83]:
'Gold Bond' in bond.index

False

## Retrieve Rows by Index Position with .iloc[]

In [88]:
## .iloc[] extract rows by index position
bond = pd.read_csv('jamesbond.csv')
bond.head(3)    ## Here the index label is the same as the index position, thus .loc[] and .iloc[] are the same
bond.loc[15]
bond.iloc[15]

Film                 A View to a Kill
Year                             1985
Actor                     Roger Moore
Director                    John Glen
Box Office                      275.2
Budget                           54.5
Bond Actor Salary                 9.1
Name: 15, dtype: object

In [93]:
bond.iloc[[15, 20]]
bond.iloc[: 4]
bond.iloc[4 : 8]
bond.iloc[20 : ]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [101]:
bond = pd.read_csv('jamesbond.csv', index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [102]:
## Even though now we have string index labels, each of these index labels is still assigned an index position
bond.iloc[0]

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

In [106]:
bond.iloc[15:20]
bond.iloc[: 8]
bond.iloc[18 : ]
bond.iloc[[5, 10, 15, 20]]
## .iloc[] extracts rows from the dataframe based on the index position/location, regardless of what we are actually 
## using as the index. If we set up a custom index(Film, Year), each index label still gonna be assigned a numeric position.

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


## The Catch-All .ix[] Method

In [119]:
## .ix[] combines the .loc[] and .iloc[] together
bond = pd.read_csv('jamesbond.csv', index_col = 'Film')
bond.sort_index(inplace = True)
bond.ix['GoldenEye']   ## same as bond.loc['GoldenEye]
bond.ix[['Diamonds Are Forever', 'Moonraker', 'Spectre']]
## bond.ix['A View to a Kill' : 'The World Is Not Enough']  ## The World Is Not Enough is inclusive

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [120]:
##  bond.ix['Sacred Bond'] does not exist which gives an error output
##  bond.ix[['Spectre', 'Sacred Bond']]  Error Message
'Spctre' in bond.index
'Sacred Bond' in bond.index
bond.loc[['Spectre', 'Sacred Bond']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015.0,Daniel Craig,Sam Mendes,726.7,206.3,
Sacred Bond,,,,,,


In [126]:
bond.ix[10]
bond.ix[10:15]  ## Only show the record until index position of 14
bond.ix[[8, 16 , 24]]
## bond.ix[100]  ## Gives Error Message
## bond.ix[[8, 30]]   ## Still gives an Error Message, unlike passing the trings to .ix[]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  from ipykernel import kernelapp as app
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  app.launch_new_instance()


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0


## Second Arguments to .loc[], iloc[], and .ix[] Methods

In [131]:
bond.loc['Moonraker', 'Actor']
## It will find the row with Moonraker index label and then find the column with 'Actor' and extract the value in 'Actor'
bond.loc['Moonraker', 'Director']

'Lewis Gilbert'

In [133]:
## Pull more than one columns
bond.loc['Moonraker', ['Director', 'Actor']]
bond.loc['Moonraker', 'Director' : 'Budget']

Director      Lewis Gilbert
Box Office              535
Budget                 91.5
Name: Moonraker, dtype: object

In [137]:
##  use the .iloc[] to extract value in specific column, we need to find the location of this column because .iloc[] 
## only accepts integers in its arguments
bond.iloc[0, 2]
bond.iloc[14, 2 : 5]
bond.iloc[14, [5, 3, 2]]

Bond Actor Salary          7.8
Box Office               373.8
Director             John Glen
Name: Octopussy, dtype: object

In [141]:
bond.ix[20, 'Budget']
bond.ix[5, 3]
## .ix[] can mix the number and  string
bond.ix['The Man with the Golden Gun', ['Actor', 'Budget']]
bond.ix['The Man with the Golden Gun', : 4]
bond.ix['The Man with the Golden Gun', 2]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  from ipykernel import kernelapp as app
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the document

'Guy Hamilton'

## Set New Values for a Specific Cell or Row