In [33]:
import pandas as pd
import numpy as np

# Series and Dataframes

### The pandas library has two main objects which serve as containers for our data:

#### 1.a one-dimensional labeled array called Series
#### 2.a two-dimensional labeled array called DataFrame

## Series

In [3]:
my_Series = pd.Series([1,'cat',10.2,'dog'])

0       1
1     cat
2    10.2
3     dog
dtype: object

In [None]:
my_Series

In [5]:
my_Series[1]

'cat'

In [8]:
ages = pd.Series([20,53,68], index=['John', 'Allen', 'Mary'])

In [9]:
ages

John     20
Allen    53
Mary     68
dtype: int64

In [10]:
ages['John']

20

In [12]:
# Users of R or Excel might recognize this as something that looks like a data table. 
# And indeed, this is the purpose of the index. 
#In order to get the full functionality of a data table, however, we need to allow more than one axis. 

# Dataframes

The general syntax for defining a DataFrame is the following:

pd.DataFrame({ 'label1' : [col1], 'label2': [col2], .... })

In [13]:
df = pd.DataFrame( {'user' : [1,2,3],
            'age' : [24,54,17],
            'sex' : ['F','F','M'],
            'occupation' : ['technician','musician','student']})

In [14]:
df

Unnamed: 0,age,occupation,sex,user
0,24,technician,F,1
1,54,musician,F,2
2,17,student,M,3


In [15]:
df.set_index('user')

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,technician,F
2,54,musician,F
3,17,student,M


In [16]:
df

Unnamed: 0,age,occupation,sex,user
0,24,technician,F,1
1,54,musician,F,2
2,17,student,M,3


## Inplace changes

In [17]:
df.set_index('user', inplace = True)

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,technician,F
2,54,musician,F
3,17,student,M


In [18]:
df

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,technician,F
2,54,musician,F
3,17,student,M


# Importing data

df = pd.read_csv('file_name.csv')

df = pd.read_csv('file_name.csv', header=None)

df = pd.read_csv('file_name.csv', names=['Header1', 'Header2', ....])

df = pd.read_csv('file_name.csv', na_values=['?'])

df = pd.read_excel('file_name.xls')

Other supported files include JSON, HTML, SAS, and SQL.

# Summarizing data

In [20]:
df

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,technician,F
2,54,musician,F
3,17,student,M


In [21]:
df.head(2)

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,technician,F
2,54,musician,F


In [22]:
df.tail(1)

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,17,student,M


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 1 to 3
Data columns (total 3 columns):
age           3 non-null int64
occupation    3 non-null object
sex           3 non-null object
dtypes: int64(1), object(2)
memory usage: 96.0+ bytes


In [24]:
df.index

Int64Index([1, 2, 3], dtype='int64', name='user')

In [25]:
#displaying the number of rows
df.shape[0]

3

In [26]:
#displaying the number of columns
df.shape[1]

3

In [27]:
#displaying the labels of all the columns
df.columns

Index(['age', 'occupation', 'sex'], dtype='object')

In [28]:
#displaying the data types of each column
df.dtypes

age            int64
occupation    object
sex           object
dtype: object

In [29]:
#summarizing the data
df.describe()

Unnamed: 0,age
count,3.0
mean,31.666667
std,19.655364
min,17.0
25%,20.5
50%,24.0
75%,39.0
max,54.0


In [31]:
df

Unnamed: 0_level_0,age,occupation,sex
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,technician,F
2,54,musician,F
3,17,student,M


In [32]:
df['occupation']

user
1    technician
2      musician
3       student
Name: occupation, dtype: object

# Manipulating the data

In [35]:
df = pd.DataFrame(np.arange(9).reshape(3,3), columns=['a','b', 'c'])
df

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


## Dropping data

In [36]:
# dropping rowa
df.drop(0, axis=0)

Unnamed: 0,a,b,c
1,3,4,5
2,6,7,8


In [39]:
# dropping first and second row
df.drop([0,2], axis=0)

Unnamed: 0,a,b,c
1,3,4,5


In [40]:
# And to drop columns we just specify the label instead of the index 
# and tell pandas we are referring to the second axis now:
df.drop(['b','c'], axis=1)

Unnamed: 0,a
0,0
1,3
2,6


In [41]:
df

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


## Arithmetic operations

In [42]:
df.a + df.b

0     1
1     7
2    13
dtype: int64

In [43]:
df['a'] + df['b']

0     1
1     7
2    13
dtype: int64

In [44]:
df.a.add(df.b)

0     1
1     7
2    13
dtype: int64

Pandas also provides similar methods for the other operations:

### sub()
### div()
### mul()

## Concatenating DataFrames

In [46]:
df1= pd.DataFrame([['Mark', 50], ['Kate', 46]],
                 columns=['name', 'age'])

In [47]:
df2 = pd.DataFrame([['Jon', 3], ['David', 4]],
                columns=['name', 'age'])

In [48]:
df1

Unnamed: 0,name,age
0,Mark,50
1,Kate,46


In [49]:
df2

Unnamed: 0,name,age
0,Jon,3
1,David,4


In [50]:
pd.concat([df1,df2])

Unnamed: 0,name,age
0,Mark,50
1,Kate,46
0,Jon,3
1,David,4


In [51]:
df3 = pd.DataFrame(['writer', 'journalist'], columns=['occupation'])
df3

Unnamed: 0,occupation
0,writer
1,journalist


In [52]:
pd.concat([df1,df3])

Unnamed: 0,age,name,occupation
0,50.0,Mark,
1,46.0,Kate,
0,,,writer
1,,,journalist


In [53]:
pd.concat([df1,df3], axis=1)

Unnamed: 0,name,age,occupation
0,Mark,50,writer
1,Kate,46,journalist


# Indexing, selecting and filtering

In [57]:
df = pd.read_csv('Mountains.csv', index_col=0)
df

Unnamed: 0_level_0,Mountain,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
2,K2 / Qogir / Godwin Austen,8611,28251,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest,1954,45,44.0
3,Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
4,Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
5,Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
6,Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
7,Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
8,Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0
9,Nanga Parbat,8126,26660,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri,1953,52,67.0
10,Annapurna I,8091,26545,2984,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿,Cho Oyu,1950,36,47.0


In [58]:
df.set_index('Mountain', inplace=True)
df

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
K2 / Qogir / Godwin Austen,8611,28251,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest,1954,45,44.0
Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0
Nanga Parbat,8126,26660,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri,1953,52,67.0
Annapurna I,8091,26545,2984,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿,Cho Oyu,1950,36,47.0


In [59]:
df.index

Index(['Mount Everest / Sagarmatha / Chomolungma',
       'K2 / Qogir / Godwin Austen', 'Kangchenjunga', 'Lhotse', 'Makalu',
       'Cho Oyu', 'Dhaulagiri I', 'Manaslu', 'Nanga Parbat', 'Annapurna I',
       ...
       'Karjiang', 'Annapurna Dakshin', 'Khartaphu', 'Tongshanjiabu',
       'Malangutti Sar', 'Noijin Kangsang / Norin Kang', 'Langtang Ri',
       'Kangphu Kang', 'Singhi Kangri', 'Lupghar Sar'],
      dtype='object', name='Mountain', length=118)

In [60]:
df.columns

Index(['Height (m)', 'Height (ft)', 'Prominence (m)', 'Range', 'Coordinates',
       'Parent mountain', 'First ascent', 'Ascents bef. 2004',
       'Failed attempts bef. 2004'],
      dtype='object')

In [61]:
df.Range

Mountain
Mount Everest / Sagarmatha / Chomolungma             Mahalangur Himalaya
K2 / Qogir / Godwin Austen                             Baltoro Karakoram
Kangchenjunga                                     Kangchenjunga Himalaya
Lhotse                                               Mahalangur Himalaya
Makalu                                               Mahalangur Himalaya
Cho Oyu                                              Mahalangur Himalaya
Dhaulagiri I                                         Dhaulagiri Himalaya
Manaslu                                                 Manaslu Himalaya
Nanga Parbat                                       Nanga Parbat Himalaya
Annapurna I                                           Annapurna Himalaya
Gasherbrum I / Hidden Peak / K5                        Baltoro Karakoram
Broad Peak / K3                                        Baltoro Karakoram
Gasherbrum II / K4                                     Baltoro Karakoram
Shishapangma                              

In [62]:
df['Height (m)']

Mountain
Mount Everest / Sagarmatha / Chomolungma    8848
K2 / Qogir / Godwin Austen                  8611
Kangchenjunga                               8586
Lhotse                                      8516
Makalu                                      8485
Cho Oyu                                     8188
Dhaulagiri I                                8167
Manaslu                                     8163
Nanga Parbat                                8126
Annapurna I                                 8091
Gasherbrum I / Hidden Peak / K5             8080
Broad Peak / K3                             8051
Gasherbrum II / K4                          8035
Shishapangma                                8027
Gyachung Kang                               7952
Gasherbrum III                              7946
Annapurna II                                7937
Gasherbrum IV                               7932
Himalchuli                                  7893
Distaghil Sar                               7884
Ngadi Chuli

In [64]:
# multiple columns
df[['Height (m)', 'Range', 'Coordinates']]

Unnamed: 0_level_0,Height (m),Range,Coordinates
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿
K2 / Qogir / Godwin Austen,8611,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿
Kangchenjunga,8586,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿
Lhotse,8516,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿
Makalu,8485,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿
Cho Oyu,8188,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿
Dhaulagiri I,8167,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿
Manaslu,8163,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿
Nanga Parbat,8126,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿
Annapurna I,8091,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿


In [65]:
df[2:8]

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0


In [66]:
df.head(10)

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
K2 / Qogir / Godwin Austen,8611,28251,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest,1954,45,44.0
Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0
Nanga Parbat,8126,26660,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri,1953,52,67.0
Annapurna I,8091,26545,2984,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿,Cho Oyu,1950,36,47.0


In [67]:
df['Lhotse':'Manaslu']

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0


# The iloc operator to select rows and columns by position

df.iloc[rows, columns]

## single position value, e.g. 3
## a list of position values, e.g. [3,5,8]
## a slice of position values, e.g. 3:8
## the : symbol to select all the rows and/or columns

In [69]:
df.iloc[3]

Height (m)                                      8516
Height (ft)                                    27940
Prominence (m)                                   610
Range                            Mahalangur Himalaya
Coordinates                  27°57′42″N 86°55′59″E﻿ 
Parent mountain                        Mount Everest
First ascent                                    1956
Ascents bef. 2004                                 26
Failed attempts bef. 2004                         26
Name: Lhotse, dtype: object

In [71]:
df.iloc[3,5]

'Mount Everest'

In [72]:
df.head()

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
K2 / Qogir / Godwin Austen,8611,28251,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest,1954,45,44.0
Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0


In [73]:
df.iloc[3:8]

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0


In [74]:
df.iloc[:, 2:6]

Unnamed: 0_level_0,Prominence (m),Range,Coordinates,Parent mountain
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,
K2 / Qogir / Godwin Austen,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest
Kangchenjunga,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest
Lhotse,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest
Makalu,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest
Cho Oyu,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest
Dhaulagiri I,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2
Manaslu,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu
Nanga Parbat,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri
Annapurna I,2984,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿,Cho Oyu


In [75]:
df.iloc[::2, 2:]

Unnamed: 0_level_0,Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
Kangchenjunga,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
Makalu,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Dhaulagiri I,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Nanga Parbat,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri,1953,52,67.0
Gasherbrum I / Hidden Peak / K5,2155,Baltoro Karakoram,35°43′28″N 76°41′47″E﻿,K2,1958,31,16.0
Gasherbrum II / K4,1524,Baltoro Karakoram,35°45′28″N 76°39′12″E﻿,Gasherbrum I,1956,54,12.0
Gyachung Kang,700,Mahalangur Himalaya,28°05′53″N 86°44′42″E﻿,Cho Oyu,1964,5,3.0
Annapurna II,2437,Annapurna Himalaya,28°32′05″N 84°07′19″E﻿,Annapurna I,1960,6,19.0
Himalchuli,1633,Manaslu Himalaya,28°26′12″N 84°38′23″E﻿,Manaslu,1960,6,12.0


# The loc operator to select rows and columns by label

df.loc[rows, columns]

In [76]:
df.loc[:,'Height (m)':'First ascent']

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953
K2 / Qogir / Godwin Austen,8611,28251,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest,1954
Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956
Nanga Parbat,8126,26660,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri,1953
Annapurna I,8091,26545,2984,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿,Cho Oyu,1950


In [77]:
df.loc[:,'Height (m)':'First ascent':2]

Unnamed: 0_level_0,Height (m),Prominence (m),Coordinates,First ascent
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,8848,27°59′17″N 86°55′31″E﻿,1953
K2 / Qogir / Godwin Austen,8611,4017,35°52′53″N 76°30′48″E﻿,1954
Kangchenjunga,8586,3922,27°42′12″N 88°08′51″E﻿,1955
Lhotse,8516,610,27°57′42″N 86°55′59″E﻿,1956
Makalu,8485,2386,27°53′23″N 87°05′20″E﻿,1955
Cho Oyu,8188,2340,28°05′39″N 86°39′39″E﻿,1954
Dhaulagiri I,8167,3357,28°41′48″N 83°29′35″E﻿,1960
Manaslu,8163,3092,28°33′00″N 84°33′35″E﻿,1956
Nanga Parbat,8126,4608,35°14′14″N 74°35′21″E﻿,1953
Annapurna I,8091,2984,28°35′44″N 83°49′13″E﻿,1950


# Boolean selection

In [79]:
df['Height (m)'] > 8000

Mountain
Mount Everest / Sagarmatha / Chomolungma     True
K2 / Qogir / Godwin Austen                   True
Kangchenjunga                                True
Lhotse                                       True
Makalu                                       True
Cho Oyu                                      True
Dhaulagiri I                                 True
Manaslu                                      True
Nanga Parbat                                 True
Annapurna I                                  True
Gasherbrum I / Hidden Peak / K5              True
Broad Peak / K3                              True
Gasherbrum II / K4                           True
Shishapangma                                 True
Gyachung Kang                               False
Gasherbrum III                              False
Annapurna II                                False
Gasherbrum IV                               False
Himalchuli                                  False
Distaghil Sar                            

In [80]:
df[df['Height (m)'] > 8000]

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
K2 / Qogir / Godwin Austen,8611,28251,4017,Baltoro Karakoram,35°52′53″N 76°30′48″E﻿,Mount Everest,1954,45,44.0
Kangchenjunga,8586,28169,3922,Kangchenjunga Himalaya,27°42′12″N 88°08′51″E﻿,Mount Everest,1955,38,24.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0
Dhaulagiri I,8167,26795,3357,Dhaulagiri Himalaya,28°41′48″N 83°29′35″E﻿,K2,1960,51,39.0
Manaslu,8163,26781,3092,Manaslu Himalaya,28°33′00″N 84°33′35″E﻿,Cho Oyu,1956,49,45.0
Nanga Parbat,8126,26660,4608,Nanga Parbat Himalaya,35°14′14″N 74°35′21″E﻿,Dhaulagiri,1953,52,67.0
Annapurna I,8091,26545,2984,Annapurna Himalaya,28°35′44″N 83°49′13″E﻿,Cho Oyu,1950,36,47.0


In [81]:
df[(df['Height (m)'] > 8000) & (df['Range']=='Mahalangur Himalaya')]

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0


In [82]:
df.loc[(df['Height (m)'] > 8000) & (df['Range']=='Mahalangur Himalaya'), :]

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range,Coordinates,Parent mountain,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya,27°59′17″N 86°55′31″E﻿,,1953,>>145,121.0
Lhotse,8516,27940,610,Mahalangur Himalaya,27°57′42″N 86°55′59″E﻿,Mount Everest,1956,26,26.0
Makalu,8485,27838,2386,Mahalangur Himalaya,27°53′23″N 87°05′20″E﻿,Mount Everest,1955,45,52.0
Cho Oyu,8188,26864,2340,Mahalangur Himalaya,28°05′39″N 86°39′39″E﻿,Mount Everest,1954,79,28.0


In [83]:
df.loc[(df['Height (m)'] > 8000) & (df['Range']=='Mahalangur Himalaya'), 'Height (m)':'Range']

Unnamed: 0_level_0,Height (m),Height (ft),Prominence (m),Range
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mount Everest / Sagarmatha / Chomolungma,8848,29029,8848,Mahalangur Himalaya
Lhotse,8516,27940,610,Mahalangur Himalaya
Makalu,8485,27838,2386,Mahalangur Himalaya
Cho Oyu,8188,26864,2340,Mahalangur Himalaya


# Applying functions

### The map() method

In [84]:
df = pd.DataFrame( {'user' : [1,2,3], 'age' : [24,54,17],
                    'sex' : ['F','F','M'],
                    'occupation' : ['technician','musician','student']})
df

Unnamed: 0,age,occupation,sex,user
0,24,technician,F,1
1,54,musician,F,2
2,17,student,M,3


In [85]:
df['sex'] = df.sex.map({'F':'Female', 'M':'Male'})
df

Unnamed: 0,age,occupation,sex,user
0,24,technician,Female,1
1,54,musician,Female,2
2,17,student,Male,3


### The apply() method

In [86]:
def dog_years(x):
    return x // 7

In [87]:
df['age_dog_years'] = df['age'].apply(dog_years)
df

Unnamed: 0,age,occupation,sex,user,age_dog_years
0,24,technician,Female,1,3
1,54,musician,Female,2,7
2,17,student,Male,3,2


In [89]:
df2 = pd.DataFrame(np.arange(9).reshape(3,3), columns=['a','b', 'c'])
df2

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


In [90]:
def my_sum(x):
    return sum(x)

In [91]:
# along axis 0 meaning down the data frame which will return the sum along each column
df2.apply(my_sum, axis=0)

a     9
b    12
c    15
dtype: int64

In [92]:
# along axis 1 meaning across the data frame which will return the sum along each row
df2.apply(my_sum, axis=1)

0     3
1    12
2    21
dtype: int64

In [94]:
# finds the maximum entry in each row 
df2.apply(np.max, axis = 1)

0    2
1    5
2    8
dtype: int64

In [95]:
#find2 the mean of each column
df2.apply(np.mean, axis = 0)

a    3.0
b    4.0
c    5.0
dtype: float64

### The applymap() method

In [96]:
def add_two(x):
    return x+2

In [97]:
df2.applymap(add_two)

Unnamed: 0,a,b,c
0,2,3,4
1,5,6,7
2,8,9,10


# Sorting

In [98]:
df = pd.DataFrame({'A':[3,6,1,12,3],'B':[0,0,7,5,6],'C':[10,4,5,8,2]})
df

Unnamed: 0,A,B,C
0,3,0,10
1,6,0,4
2,1,7,5
3,12,5,8
4,3,6,2


### Sort by index

In [99]:
df.sort_index()

Unnamed: 0,A,B,C
0,3,0,10
1,6,0,4
2,1,7,5
3,12,5,8
4,3,6,2


In [100]:
df.sort_index(ascending=False)

Unnamed: 0,A,B,C
4,3,6,2
3,12,5,8
2,1,7,5
1,6,0,4
0,3,0,10


In [101]:
df.sort_index(ascending=False, axis=1)

Unnamed: 0,C,B,A
0,10,0,3
1,4,0,6
2,5,7,1
3,8,5,12
4,2,6,3


### Sort by values

In [102]:
df.A.sort_values()

2     1
0     3
4     3
1     6
3    12
Name: A, dtype: int64

In [103]:
df.sort_values('A')

Unnamed: 0,A,B,C
2,1,7,5
0,3,0,10
4,3,6,2
1,6,0,4
3,12,5,8


In [104]:
df.sort_values(['A','C'])

Unnamed: 0,A,B,C
2,1,7,5
4,3,6,2
0,3,0,10
1,6,0,4
3,12,5,8


# Grouping

In [106]:
df = pd.DataFrame({
       'A' : ['dog', 'cat', 'dog', 'cat', 'dog', 'cat', 'dog', 'dog'],
       'B' : ['one', 'one', 'two', 'three','two', 'two', 'one', 'three'],
       'C' : np.random.randint(10, size=8)})
df

Unnamed: 0,A,B,C
0,dog,one,3
1,cat,one,1
2,dog,two,7
3,cat,three,1
4,dog,two,5
5,cat,two,2
6,dog,one,1
7,dog,three,4


In [107]:
df.C.mean()

3.0

In [108]:
df.groupby('A').C.mean()

A
cat    1.333333
dog    4.000000
Name: C, dtype: float64

In [109]:
df[df.A=='dog'].C.mean()

4.0

# Handling missing values

In [111]:
df = pd.DataFrame(np.random.randint(10, size=(3, 3)), index=['a', 'c', 'e'], columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
a,3,9,1
c,9,8,4
e,2,4,3


In [113]:
df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f'])
df2

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
b,,,
c,9.0,8.0,4.0
d,,,
e,2.0,4.0,3.0
f,,,


# isnull() method

In [114]:
df2.isnull()

Unnamed: 0,A,B,C
a,False,False,False
b,True,True,True
c,False,False,False
d,True,True,True
e,False,False,False
f,True,True,True


In [115]:
df2.isnull().sum()

A    3
B    3
C    3
dtype: int64

In [116]:
df2.isnull().sum(axis=1)

a    0
b    3
c    0
d    3
e    0
f    3
dtype: int64

In [117]:
df2[df2.A.isnull()]

Unnamed: 0,A,B,C
b,,,
d,,,
f,,,


# dropna() method

In [118]:
df2.dropna()

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
c,9.0,8.0,4.0
e,2.0,4.0,3.0


In [119]:
df2

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
b,,,
c,9.0,8.0,4.0
d,,,
e,2.0,4.0,3.0
f,,,


In [120]:
# drop a row if it has a missing value in all of the columns
df2.dropna(how='all')

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
c,9.0,8.0,4.0
e,2.0,4.0,3.0


In [121]:
# drop a row if it has a missing value in column 'A'
df2.dropna(subset=['A'])

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
c,9.0,8.0,4.0
e,2.0,4.0,3.0


In [122]:
# drop a row if it has a missing value in column 'A' or column B
df2.dropna(subset=['A','B'])

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
c,9.0,8.0,4.0
e,2.0,4.0,3.0


# fillna() method

In [125]:
df2.fillna(value=0)

Unnamed: 0,A,B,C
a,3.0,9.0,1.0
b,0.0,0.0,0.0
c,9.0,8.0,4.0
d,0.0,0.0,0.0
e,2.0,4.0,3.0
f,0.0,0.0,0.0
