In [1]:
import pandas as pd

In [2]:
# reindex, which means to create a new object with the data conformed to a new index
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [3]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [4]:
# use reindex to do interpolation or filling
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
print(obj3)
obj3.reindex(range(6), method="ffill")  # ffill: forward-fills, bfill
print(obj3)

0      blue
2    purple
4    yellow
dtype: object
0      blue
2    purple
4    yellow
dtype: object


In [6]:
# with DataFrame, reindex can alter either the (row)index or columns or both
# when passed only a sequence, it reindexes the rows
import numpy as np
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                     index=['a', 'c', 'd'],
                     columns=['Ohio', 'Texas', 'California'])
print(frame)
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
print(frame2)
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)

   Ohio  Texas  California
a     0      1           2
c     3      4           5
d     6      7           8
   Ohio  Texas  California
a   0.0    1.0         2.0
b   NaN    NaN         NaN
c   3.0    4.0         5.0
d   6.0    7.0         8.0


Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [8]:
# Drop entries from an axis
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
print(obj)
obj2 = obj.drop('c')
print(obj2)
obj3 = obj.drop(['d', 'c'])
print(obj3)

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64
a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64
a    0.0
b    1.0
e    4.0
dtype: float64


In [11]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
print(data)
print(data.drop(['Ohio']))
# drop values from columns by passing axis=1 or axis='columns'
print(data.drop('two', axis=1))
# set inplace=True, no returning a new object
obj.drop('c', inplace=True)
obj

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
          one  two  three  four
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
          one  three  four
Ohio        0      2     3
Colorado    4      6     7
Utah        8     10    11
New York   12     14    15


a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [15]:
# Series indexing(obj[...]) works analogously to NumPy array indexing
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
print(obj)
print(obj['b'])
print(obj[1:2])
print(obj[['b', 'a', 'c']])
print(obj[[1, 3]])
print(obj[obj < 2])
print(obj['b':'c'])     # slicing with labels: end-point is inclusive
obj['b':'c'] = 5
print(obj)

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64
1.0
b    1.0
dtype: float64
b    1.0
a    0.0
c    2.0
dtype: float64
b    1.0
d    3.0
dtype: float64
a    0.0
b    1.0
dtype: float64
b    1.0
c    2.0
dtype: float64
a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64


In [17]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
print(data)
print(data['two'])
print(data[['three', 'one']])
print(data[:2])
print(data[data['three'] > 5])

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32
          three  one
Ohio          2    0
Colorado      6    4
Utah         10    8
New York     14   12
          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
          one  two  three  four
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


In [22]:
print(data < 5)
data[data < 5] = 0
data

            one    two  three   four
Ohio       True   True   True   True
Colorado   True  False  False  False
Utah      False  False  False  False
New York  False  False  False  False


Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [26]:
# loc, iloc enable you to select a subset of the rows and columns from DataFrame
print(data.loc['Utah', ['two', 'one']])
print(data.iloc[2, [3, 0, 1]])

two    9
one    8
Name: Utah, dtype: int32
four    11
one      8
two      9
Name: Utah, dtype: int32
