In [2]:
import pandas as pd
import numpy as np

In [3]:
#Reindexing: create new object with the data conformed to a new index
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [4]:
obj2 = obj.reindex(['a','b','c','d'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
dtype: float64

In [5]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [8]:
frame = pd.DataFrame(np.arange(9).reshape((3,3)), index = ['a','b','c'],columns = ['Ohio', "Texas", "California"] )
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
b,3,4,5
c,6,7,8


In [9]:
frame2 = frame.reindex(['a','b','c','d'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,,,


**Dropping Entries from an Axis**

In [14]:
obj = pd.Series(np.arange(5.), index=['a','b','c','d','e'])
obj


a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [15]:
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [17]:
frame2.drop(['Texas', "Ohio"], axis='columns')

Unnamed: 0,California
a,2.0
b,5.0
c,8.0
d,


In [19]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,,,


**Indexing, Selection and Filtering**

In [21]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), index=['Ohio', 'Colorado', 'Utah', 'New York'], columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [22]:
data.loc["Colorado", ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int32

In [24]:
data.iloc[1,[1,2]]

two      5
three    6
Name: Colorado, dtype: int32

**Integer Indexes**


In [25]:
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [26]:
s1.add(s2, fill_value=0)

a    5.2
c    1.1
d    3.4
e    0.0
f    4.0
g    3.1
dtype: float64

In [27]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),
.....: index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,-0.922196,-1.3148,0.797848
Ohio,2.648464,-1.328696,-1.913788
Texas,-0.250548,0.31049,-0.751422
Oregon,-1.626916,0.681364,0.664287


In [29]:
f = lambda x: x.max()-x.min()
frame.apply(f)

b    4.275380
d    2.010059
e    2.711636
dtype: float64

In [30]:
frame.apply(f, axis='columns')

Utah      2.112648
Ohio      4.562252
Texas     1.061911
Oregon    2.308279
dtype: float64

In [31]:
format = lambda x: '%.2f' % x
frame.applymap(format)#elementwise

Unnamed: 0,b,d,e
Utah,-0.92,-1.31,0.8
Ohio,2.65,-1.33,-1.91
Texas,-0.25,0.31,-0.75
Oregon,-1.63,0.68,0.66


**Sorting and Ranking**

In [32]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])

obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [33]:
frame = pd.DataFrame(np.arange(8).reshape((2, 4)),
.....: index=['three', 'one'],
.....: columns=['d', 'a', 'b', 'c'])
frame.sort_index()


Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [37]:
frame.sort_index(axis=1, ascending="False")

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [38]:
frame.sort_values(by='b')

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [40]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [41]:
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

**Axis Indexes with Duplicate Labels**

In [42]:
obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [44]:
obj.index.is_unique

False