# Pandas Documentation on Essential Basic Functionality

In this notebook, you will work through the Pandas documentation on DataFrames.

## Imports

In [1]:
import numpy as np
import pandas as pd

## Pandas essential basic functionality

In this notebook, you are going to learn how to use Pandas by typing the code from the Pandas documentation into this notebook.

* Go to the Pandas [Essential Basic Functionality](http://pandas.pydata.org/pandas-docs/stable/basics.html#essential-basic-functionality).
* Type all of the code from that section of the documentation into this notebook and get it working.
* **To learn this API well, you must type the code rather than copy and pasting it**.
* Create a new cell in this section for each `In[]` prompt in the documentation.
* Ignore the cells in the **Grading** section below.
* No Markdown comments are needed.
* Skip the following sub-sections:
  - Tablewise Function Application
  - Applying with a Panel

## Grading

In [2]:
index = pd.date_range('1/1/2000', periods=8)

In [3]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [4]:
df = pd.DataFrame(np.random.randn(8,3), index=index, columns=['A', 'B', 'C'])

In [5]:
wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
              major_axis=pd.date_range('1/1/2000', periods=5),
              minor_axis=['A', 'B', 'C', 'D'])

In [6]:
long_series = pd.Series(np.random.randn(100))

In [7]:
long_series.head()

0   -0.533381
1   -0.607611
2    0.243715
3    0.422910
4   -1.179973
dtype: float64

In [8]:
long_series.tail(3)

97    0.554384
98   -0.653682
99    1.482422
dtype: float64

In [9]:
df[:2]

Unnamed: 0,A,B,C
2000-01-01,0.416838,-0.173104,0.976614
2000-01-02,0.385227,0.32019,1.082284


In [10]:
df.columns = [x.lower() for x in df.columns]

In [11]:
df

Unnamed: 0,a,b,c
2000-01-01,0.416838,-0.173104,0.976614
2000-01-02,0.385227,0.32019,1.082284
2000-01-03,-0.74452,-0.104601,0.056099
2000-01-04,1.980763,-0.062115,-1.438949
2000-01-05,-0.922628,0.28978,0.000793
2000-01-06,0.764675,0.274263,-1.669284
2000-01-07,-0.649159,-1.053217,-1.214952
2000-01-08,0.880401,-0.323639,1.794627


In [12]:
s.values

array([-0.57424307, -0.41558229,  0.3810344 , -1.45614473, -1.43974395])

In [13]:
df.values

array([[  4.16838300e-01,  -1.73103759e-01,   9.76613733e-01],
       [  3.85227488e-01,   3.20189966e-01,   1.08228439e+00],
       [ -7.44519719e-01,  -1.04601378e-01,   5.60992272e-02],
       [  1.98076264e+00,  -6.21148827e-02,  -1.43894854e+00],
       [ -9.22627881e-01,   2.89780404e-01,   7.93260110e-04],
       [  7.64674836e-01,   2.74263255e-01,  -1.66928415e+00],
       [ -6.49159087e-01,  -1.05321728e+00,  -1.21495180e+00],
       [  8.80401104e-01,  -3.23638631e-01,   1.79462664e+00]])

In [14]:
wp.values

array([[[-0.62687604, -0.77044708,  0.14814376,  0.86602   ],
        [-0.38950365, -0.46974161, -0.44493904,  2.01285694],
        [-0.57537504,  0.23337132,  0.3126236 , -1.27477319],
        [-2.28856017, -0.3595766 , -0.30938943,  0.34214154],
        [ 0.52617044,  0.24938708, -0.72624844,  0.61293785]],

       [[ 0.2321522 , -1.24776693, -1.04357211,  1.18213557],
        [-0.41531332, -0.10644807,  0.17772657,  1.09475171],
        [ 0.24071311, -1.57803872,  1.70573743,  0.59513942],
        [-1.11792498, -0.55811142, -0.15230402, -0.5136107 ],
        [ 0.62955966,  0.24913025, -0.19121084,  1.16111762]]])

In [15]:
df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
                   'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
                   'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})

In [16]:
df

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [17]:
row = df.ix[1]

In [18]:
column = df['two']

In [19]:
df.sub(row, axis='columns')

Unnamed: 0,one,three,two
a,0.707332,,-0.354139
b,0.0,0.0,0.0
c,1.07887,3.994067,-0.855814
d,,0.08915,-2.643024


In [20]:
df.sub(row, axis=1)

Unnamed: 0,one,three,two
a,0.707332,,-0.354139
b,0.0,0.0,0.0
c,1.07887,3.994067,-0.855814
d,,0.08915,-2.643024


In [21]:
df.sub(column, axis='index')

Unnamed: 0,one,three,two
a,-0.187562,,0
b,-1.249032,-2.23397,0
c,0.685652,2.615911,0
d,,0.498204,0


In [22]:
df.sub(column, axis=0)

Unnamed: 0,one,three,two
a,-0.187562,,0
b,-1.249032,-2.23397,0
c,0.685652,2.615911,0
d,,0.498204,0


In [23]:
dfmi = df.copy()

In [24]:
dfmi.index = pd.MultiIndex.from_tuples([(1, 'a'),(1, 'b'), (1, 'c'),(2, 'a')],
                                       names=['first', 'second'])

In [25]:
dfmi.sub(column, axis=0, level='second')

Unnamed: 0_level_0,Unnamed: 1_level_0,one,three,two
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,-0.187562,,0.0
1,b,-1.249032,-2.23397,0.0
1,c,0.685652,2.615911,0.0
2,a,,-1.790681,-2.288885


In [26]:
major_mean = wp.mean(axis='major')

In [27]:
major_mean

Unnamed: 0,Item1,Item2
A,-0.670829,-0.086163
B,-0.223401,-0.648247
C,-0.203962,0.099275
D,0.511837,0.703907


In [28]:
wp.sub(major_mean, axis='major')

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis)
Items axis: Item1 to Item2
Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00
Minor_axis axis: A to D

In [29]:
df

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [30]:
df2 = df.copy()
df2['three']['a'] = 1

In [31]:
df2

Unnamed: 0,one,three,two
a,-0.016354,1.0,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [32]:
df + df2

Unnamed: 0,one,three,two
a,-0.032708,,0.342415
b,-1.447372,-3.417247,1.050693
c,0.710369,4.570887,-0.660935
d,,-3.238947,-4.235355


In [33]:
df.add(df2, fill_value=0)

Unnamed: 0,one,three,two
a,-0.032708,1.0,0.342415
b,-1.447372,-3.417247,1.050693
c,0.710369,4.570887,-0.660935
d,,-3.238947,-4.235355


In [34]:
df.gt(df2)

Unnamed: 0,one,three,two
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [35]:
df2.ne(df)

Unnamed: 0,one,three,two
a,False,True,False
b,False,False,False
c,False,False,False
d,True,False,False


In [36]:
(df > 0).all()

one      False
three    False
two      False
dtype: bool

In [37]:
(df > 0).any()

one      True
three    True
two      True
dtype: bool

In [38]:
(df > 0).any().any()

True

In [39]:
df.empty

False

In [40]:
pd.DataFrame(columns=list('ABE')).empty

True

In [41]:
pd.Series([True]).bool()

True

In [42]:
pd.Series([False]).bool()

False

In [43]:
pd.DataFrame([[True]]).bool()

True

In [44]:
pd.DataFrame([[False]]).bool()

False

In [45]:
df + df == df*2

Unnamed: 0,one,three,two
a,True,False,True
b,True,True,True
c,True,True,True
d,False,True,True


In [46]:
(df + df == df*2).all()

one      False
three    False
two       True
dtype: bool

In [47]:
np.nan == np.nan

False

In [48]:
(df + df).equals(df*2)

True

In [49]:
df1 = pd.DataFrame({'col':['foo', 0, np.nan]})

In [50]:
df2 = pd.DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0])

In [51]:
df1.equals(df2)

False

In [52]:
df1.equals(df2.sort_index())

True

In [53]:
pd.Series(['foo', 'bar', 'baz']) == 'foo'

0     True
1    False
2    False
dtype: bool

In [54]:
pd.Index(['foo', 'bar', 'baz']) == 'foo'

array([ True, False, False], dtype=bool)

In [55]:
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [56]:
pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [57]:
pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar'])

ValueError: Series lengths must match to compare

In [66]:
pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo'])

ValueError: Series lengths must match to compare

In [67]:
np.array([1, 2, 3]) == np.array([2])

array([False,  True, False], dtype=bool)

In [68]:
np.array([1, 2, 3]) == np.array([1, 2])

  if __name__ == '__main__':


False

In [69]:
df1 = pd.DataFrame({'A' : [1., np.nan, 3., 5., np.nan], 'B' : [np.nan, 2., 3., np.nan, 6.]})

In [70]:
df2 = pd.DataFrame({'A' : [5., 2., 4., np.nan, 3., 7.], 'B' : [np.nan, np.nan, 3., 4., 6., 8.]})

In [71]:
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [72]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [73]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1,
1,2,2.0
2,3,3.0
3,5,4.0
4,3,6.0
5,7,8.0


In [74]:
combiner = lambda x, y: np.where(pd.isnull(x), y, x)

In [75]:
df1.combine(df2, combiner)

Unnamed: 0,A,B
0,1,
1,2,2.0
2,3,3.0
3,5,4.0
4,3,6.0
5,7,8.0


In [76]:
df

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [77]:
df.mean(0)

one     -0.128285
three   -0.347551
two     -0.437898
dtype: float64

In [78]:
df.mean(1)

a    0.077427
b   -0.635654
c    0.770053
d   -1.868576
dtype: float64

In [79]:
df.sum(0, skipna=False)

one           NaN
three         NaN
two     -1.751591
dtype: float64

In [80]:
df.sum(axis=1, skipna=True)

a    0.154854
b   -1.906963
c    2.310160
d   -3.737151
dtype: float64

In [81]:
ts_stand = (df - df.mean()) / df.std()

In [82]:
ts_stand.std()

one      1
three    1
two      1
dtype: float64

In [83]:
xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)

In [84]:
xs_stand.std(1)

a    1
b    1
c    1
d    1
dtype: float64

In [85]:
df.cumsum()

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.74004,-1.708623,0.696554
c,-0.384856,0.57682,0.366087
d,,-1.042653,-1.751591


In [86]:
np.mean(df['one'])

-0.12828521581272825

In [87]:
np.mean(df['one'].values)

nan

In [88]:
series = pd.Series(np.random.randn(500))

In [89]:
series[20:500] = np.nan

In [90]:
series[10:20] = 5

In [91]:
series.nunique()

11

In [92]:
series = pd.Series(np.random.randn(1000))

In [93]:
series[::2] = np.nan

In [94]:
series.describe()

count    500.000000
mean      -0.015789
std        0.971827
min       -2.720437
25%       -0.703243
50%       -0.006594
75%        0.579339
max        2.946183
dtype: float64

In [95]:
frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])

In [96]:
frame.ix[::2] = np.nan

In [97]:
frame.describe()

Unnamed: 0,a,b,c,d,e
count,500.0,500.0,500.0,500.0,500.0
mean,0.087142,-0.036002,0.0323,0.025451,-0.009423
std,0.962709,0.938811,1.004919,1.037846,0.994793
min,-3.211787,-3.040441,-2.988887,-2.724035,-3.277777
25%,-0.535204,-0.636825,-0.633919,-0.703884,-0.677008
50%,0.072227,-0.046696,0.074499,0.023829,0.003375
75%,0.67752,0.57191,0.689115,0.754352,0.659742
max,2.650415,2.904486,3.52896,2.87059,2.908502


In [98]:
series.describe(percentiles=[.05, .25, .75, .95])

count    500.000000
mean      -0.015789
std        0.971827
min       -2.720437
5%        -1.625628
25%       -0.703243
50%       -0.006594
75%        0.579339
95%        1.601819
max        2.946183
dtype: float64

In [99]:
s = pd.Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])

In [100]:
s.describe()

count     9
unique    4
top       a
freq      5
dtype: object

In [101]:
frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})

In [102]:
frame.describe()

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [103]:
frame.describe(include=['object'])

Unnamed: 0,a
count,4
unique,2
top,No
freq,2


In [104]:
frame.describe(include=['number'])

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [105]:
frame.describe(include='all')

Unnamed: 0,a,b
count,4,4.0
unique,2,
top,No,
freq,2,
mean,,1.5
std,,1.290994
min,,0.0
25%,,0.75
50%,,1.5
75%,,2.25


In [106]:
s1 = pd.Series(np.random.randn(5))

In [107]:
s1

0   -0.154598
1    1.033637
2   -1.972011
3    0.950751
4    0.334222
dtype: float64

In [108]:
s1.idxmin(), s1.idxmax()

(2, 1)

In [109]:
df1 = pd.DataFrame(np.random.randn(5,3), columns=['A','B','C'])

In [110]:
df1

Unnamed: 0,A,B,C
0,0.08009,0.81829,-0.37548
1,0.577021,-0.323664,-2.14881
2,0.11786,1.177732,-0.458339
3,1.806732,-1.937619,-1.868718
4,-0.333383,0.621272,-0.066092


In [111]:
df1.idxmin(axis=0)

A    4
B    3
C    1
dtype: int64

In [112]:
df1.idxmin(axis=1)

0    C
1    C
2    C
3    B
4    A
dtype: object

In [113]:
df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))

In [114]:
df3

Unnamed: 0,A
e,2.0
d,1.0
c,1.0
b,3.0
a,


In [115]:
df3['A'].idxmin()

'd'

In [116]:
data = np.random.randint(0, 7, size=50)

In [117]:
data

array([2, 6, 6, 1, 6, 4, 1, 5, 4, 5, 3, 1, 4, 6, 0, 5, 2, 4, 6, 6, 5, 5, 5,
       5, 3, 4, 4, 2, 4, 0, 1, 2, 6, 0, 5, 4, 4, 3, 2, 4, 2, 4, 4, 0, 3, 1,
       2, 1, 4, 0])

In [118]:
s =pd.Series(data)

In [119]:
s.value_counts()

4    13
5     8
6     7
2     7
1     6
0     5
3     4
dtype: int64

In [120]:
pd.value_counts(data)

4    13
5     8
6     7
2     7
1     6
0     5
3     4
dtype: int64

In [121]:
s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7])

In [122]:
s5.mode()

0    3
1    7
dtype: int64

In [123]:
df5 = pd.DataFrame({"A": np.random.randint(0, 7, size=50), "B": np.random.randint(-10, 15, size=50)})

In [124]:
df5.mode()

Unnamed: 0,A,B
0,3,-5
1,4,13


In [125]:
arr = np.random.randn(20)

In [126]:
factor = pd.cut(arr, 4)

In [127]:
factor

[(-0.0556, 0.982], (-2.136, -1.0936], (0.982, 2.0205], (-0.0556, 0.982], (0.982, 2.0205], ..., (-2.136, -1.0936], (-0.0556, 0.982], (-0.0556, 0.982], (-0.0556, 0.982], (-0.0556, 0.982]]
Length: 20
Categories (4, object): [(-2.136, -1.0936] < (-1.0936, -0.0556] < (-0.0556, 0.982] < (0.982, 2.0205]]

In [128]:
factor = pd.cut(arr, [-5, -1, 0, 1, 5])

In [129]:
factor

[(0, 1], (-5, -1], (1, 5], (0, 1], (1, 5], ..., (-5, -1], (0, 1], (0, 1], (-1, 0], (0, 1]]
Length: 20
Categories (4, object): [(-5, -1] < (-1, 0] < (0, 1] < (1, 5]]

In [130]:
arr = np.random.randn(30)

In [131]:
factor = pd.qcut(arr, [0, .25, .5, .75, 1])

In [132]:
factor

[[-1.793, -1.0299], (0.642, 1.767], [-1.793, -1.0299], (-1.0299, 0.259], (0.642, 1.767], ..., (-1.0299, 0.259], (0.259, 0.642], (0.259, 0.642], (0.259, 0.642], (0.642, 1.767]]
Length: 30
Categories (4, object): [[-1.793, -1.0299] < (-1.0299, 0.259] < (0.259, 0.642] < (0.642, 1.767]]

In [133]:
pd.value_counts(factor)

(0.642, 1.767]       8
[-1.793, -1.0299]    8
(0.259, 0.642]       7
(-1.0299, 0.259]     7
dtype: int64

In [134]:
arr = np.random.randn(20)

In [135]:
factor = pd.cut(arr, [-np.inf, 0, np.inf])

In [136]:
factor

[(0, inf], (-inf, 0], (0, inf], (-inf, 0], (0, inf], ..., (0, inf], (-inf, 0], (0, inf], (-inf, 0], (-inf, 0]]
Length: 20
Categories (2, object): [(-inf, 0] < (0, inf]]

In [137]:
df.apply(np.mean)

one     -0.128285
three   -0.347551
two     -0.437898
dtype: float64

In [138]:
df.apply(np.mean, axis=1)

a    0.077427
b   -0.635654
c    0.770053
d   -1.868576
dtype: float64

In [139]:
df.apply(lambda x: x.max() - x.min())

one      1.078870
three    3.994067
two      2.643024
dtype: float64

In [140]:
df.apply(np.cumsum)

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.74004,-1.708623,0.696554
c,-0.384856,0.57682,0.366087
d,,-1.042653,-1.751591


In [141]:
df.apply(np.exp)

Unnamed: 0,one,three,two
a,0.983779,,1.186737
b,0.484961,0.181115,1.691045
c,1.426444,9.830046,0.718588
d,,0.198003,0.120311


In [142]:
tsdf = pd.DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],index=pd.date_range('1/1/2000', periods=1000))

In [143]:
tsdf.apply(lambda x: x.idxmax())

A   2001-12-11
B   2000-02-16
C   2000-01-02
dtype: datetime64[ns]

In [144]:
tsdf

Unnamed: 0,A,B,C
2000-01-01,1.100038,0.871723,-1.046139
2000-01-02,-0.552288,-0.149160,3.301960
2000-01-03,-0.480212,0.753661,-0.393995
2000-01-04,0.430237,-0.079810,-0.560741
2000-01-05,0.547019,-0.215913,0.387145
2000-01-06,0.079097,-0.425131,-0.977651
2000-01-07,1.721233,0.469286,0.028503
2000-01-08,0.018561,-0.632574,-2.456712
2000-01-09,0.032395,1.872464,1.139688
2000-01-10,-0.179944,1.302948,-0.168217


In [145]:
tsdf.apply(pd.Series.interpolate)

Unnamed: 0,A,B,C
2000-01-01,1.100038,0.871723,-1.046139
2000-01-02,-0.552288,-0.149160,3.301960
2000-01-03,-0.480212,0.753661,-0.393995
2000-01-04,0.430237,-0.079810,-0.560741
2000-01-05,0.547019,-0.215913,0.387145
2000-01-06,0.079097,-0.425131,-0.977651
2000-01-07,1.721233,0.469286,0.028503
2000-01-08,0.018561,-0.632574,-2.456712
2000-01-09,0.032395,1.872464,1.139688
2000-01-10,-0.179944,1.302948,-0.168217


In [146]:
df4 = df

In [147]:
df4

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [148]:
f = lambda x: len(str(x))

In [149]:
df4['one'].map(f)

a    16
b    15
c    14
d     3
Name: one, dtype: int64

In [150]:
df4.applymap(f)

Unnamed: 0,one,three,two
a,16,3,14
b,15,13,14
c,14,13,15
d,3,14,14


In [151]:
s = pd.Series(['six', 'seven', 'six', 'seven', 'six'],index=['a', 'b', 'c', 'd', 'e'])

In [152]:
t = pd.Series({'six' : 6., 'seven' : 7.})

In [153]:
s

a      six
b    seven
c      six
d    seven
e      six
dtype: object

In [154]:
s.map(t)

a    6
b    7
c    6
d    7
e    6
dtype: float64

In [155]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [156]:
s

a   -0.150685
b   -1.001961
c   -0.121509
d    0.354597
e    0.173518
dtype: float64

In [157]:
s.reindex(['e', 'b', 'f', 'd'])

e    0.173518
b   -1.001961
f         NaN
d    0.354597
dtype: float64

In [158]:
df

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [159]:

df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one'])

Unnamed: 0,three,two,one
c,2.285444,-0.330467,0.355184
f,,,
b,-1.708623,0.525346,-0.723686


In [160]:
rs = s.reindex(df.index)

In [161]:
rs

a   -0.150685
b   -1.001961
c   -0.121509
d    0.354597
dtype: float64

In [162]:
rs.index is df.index

True

In [163]:
rs

a   -0.150685
b   -1.001961
c   -0.121509
d    0.354597
dtype: float64

In [164]:
rs.index is df.index

True

In [165]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [166]:
df3

Unnamed: 0,A
e,2.0
d,1.0
c,1.0
b,3.0
a,


In [167]:
df.reindex_like(df2)

Unnamed: 0,A,B
0,,
1,,
2,,
3,,
4,,
5,,


In [168]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [169]:
s1 = s[:4]

In [170]:
s1 = s[:4]

In [171]:
s2 = s[1:]

In [172]:
s1.align(s2)

(a    1.418771
 b    0.071223
 c    1.041261
 d   -0.145339
 e         NaN
 dtype: float64, a         NaN
 b    0.071223
 c    1.041261
 d   -0.145339
 e    0.448203
 dtype: float64)

In [173]:
s1.align(s2, join='inner')

(b    0.071223
 c    1.041261
 d   -0.145339
 dtype: float64, b    0.071223
 c    1.041261
 d   -0.145339
 dtype: float64)

In [174]:
s1.align(s2, join='left')

(a    1.418771
 b    0.071223
 c    1.041261
 d   -0.145339
 dtype: float64, a         NaN
 b    0.071223
 c    1.041261
 d   -0.145339
 dtype: float64)

In [175]:
df.align(df2, join='inner')

(Empty DataFrame
 Columns: []
 Index: [], Empty DataFrame
 Columns: []
 Index: [])

In [176]:
df.align(df2, join='inner', axis=0)

(Empty DataFrame
 Columns: [one, three, two]
 Index: [], Empty DataFrame
 Columns: [A, B]
 Index: [])

In [177]:
df.align(df2.ix[0], axis=1)

(    A   B       one     three       two
 a NaN NaN -0.016354       NaN  0.171208
 b NaN NaN -0.723686 -1.708623  0.525346
 c NaN NaN  0.355184  2.285444 -0.330467
 d NaN NaN       NaN -1.619474 -2.117678, A         5
 B       NaN
 one     NaN
 three   NaN
 two     NaN
 Name: 0, dtype: float64)

In [178]:
rng = pd.date_range('1/3/2000', periods=8)

In [179]:
ts = pd.Series(np.random.randn(8), index=rng)

In [180]:
ts2 = ts[[0, 3, 6]]

In [181]:
ts

2000-01-03   -1.051793
2000-01-04    1.474370
2000-01-05   -1.109924
2000-01-06    0.178527
2000-01-07    0.611987
2000-01-08    0.406420
2000-01-09    0.778472
2000-01-10   -0.986549
Freq: D, dtype: float64

In [182]:
ts2

2000-01-03   -1.051793
2000-01-06    0.178527
2000-01-09    0.778472
dtype: float64

In [183]:
ts2.reindex(ts.index)

2000-01-03   -1.051793
2000-01-04         NaN
2000-01-05         NaN
2000-01-06    0.178527
2000-01-07         NaN
2000-01-08         NaN
2000-01-09    0.778472
2000-01-10         NaN
Freq: D, dtype: float64

In [184]:
ts2.reindex(ts.index, method='ffill')

2000-01-03   -1.051793
2000-01-04   -1.051793
2000-01-05   -1.051793
2000-01-06    0.178527
2000-01-07    0.178527
2000-01-08    0.178527
2000-01-09    0.778472
2000-01-10    0.778472
Freq: D, dtype: float64

In [185]:
ts2.reindex(ts.index, method='bfill')

2000-01-03   -1.051793
2000-01-04    0.178527
2000-01-05    0.178527
2000-01-06    0.178527
2000-01-07    0.778472
2000-01-08    0.778472
2000-01-09    0.778472
2000-01-10         NaN
Freq: D, dtype: float64

In [186]:
ts2.reindex(ts.index, method='nearest')

2000-01-03   -1.051793
2000-01-04   -1.051793
2000-01-05    0.178527
2000-01-06    0.178527
2000-01-07    0.178527
2000-01-08    0.778472
2000-01-09    0.778472
2000-01-10    0.778472
Freq: D, dtype: float64

In [187]:
ts2.reindex(ts.index).fillna(method='ffill')

2000-01-03   -1.051793
2000-01-04   -1.051793
2000-01-05   -1.051793
2000-01-06    0.178527
2000-01-07    0.178527
2000-01-08    0.178527
2000-01-09    0.778472
2000-01-10    0.778472
Freq: D, dtype: float64

In [188]:
ts2.reindex(ts.index, method='ffill', limit=1)

2000-01-03   -1.051793
2000-01-04   -1.051793
2000-01-05         NaN
2000-01-06    0.178527
2000-01-07    0.178527
2000-01-08         NaN
2000-01-09    0.778472
2000-01-10    0.778472
Freq: D, dtype: float64

In [189]:
ts2.reindex(ts.index, method='ffill', tolerance='1 day')

2000-01-03   -1.051793
2000-01-04   -1.051793
2000-01-05         NaN
2000-01-06    0.178527
2000-01-07    0.178527
2000-01-08         NaN
2000-01-09    0.778472
2000-01-10    0.778472
Freq: D, dtype: float64

In [190]:
df

Unnamed: 0,one,three,two
a,-0.016354,,0.171208
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
d,,-1.619474,-2.117678


In [191]:
df.drop(['a', 'd'], axis=0)

Unnamed: 0,one,three,two
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467


In [192]:
df.drop(['one'], axis=1)

Unnamed: 0,three,two
a,,0.171208
b,-1.708623,0.525346
c,2.285444,-0.330467
d,-1.619474,-2.117678


In [193]:
df.reindex(df.index.difference(['a', 'd']))

Unnamed: 0,one,three,two
b,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467


In [194]:
s

a    1.418771
b    0.071223
c    1.041261
d   -0.145339
e    0.448203
dtype: float64

In [195]:
s.rename(str.upper)

A    1.418771
B    0.071223
C    1.041261
D   -0.145339
E    0.448203
dtype: float64

In [196]:
df.rename(columns={'one' : 'foo', 'two' : 'bar'},index={'a' : 'apple', 'b' : 'banana', 'd' : 'durian'})

Unnamed: 0,foo,three,bar
apple,-0.016354,,0.171208
banana,-0.723686,-1.708623,0.525346
c,0.355184,2.285444,-0.330467
durian,,-1.619474,-2.117678


In [197]:
df = pd.DataFrame({'col1' : np.random.randn(3), 'col2' : np.random.randn(3)},index=['a', 'b', 'c'])

In [198]:
for col in df:
    print(col)

col1
col2


In [199]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']})

In [200]:
for index, row in df.iterrows():
    row['a'] = 10

In [201]:
df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [202]:
for item, frame in wp.iteritems():
    print(item)
    print(frame)

Item1
                   A         B         C         D
2000-01-01 -0.626876 -0.770447  0.148144  0.866020
2000-01-02 -0.389504 -0.469742 -0.444939  2.012857
2000-01-03 -0.575375  0.233371  0.312624 -1.274773
2000-01-04 -2.288560 -0.359577 -0.309389  0.342142
2000-01-05  0.526170  0.249387 -0.726248  0.612938
Item2
                   A         B         C         D
2000-01-01  0.232152 -1.247767 -1.043572  1.182136
2000-01-02 -0.415313 -0.106448  0.177727  1.094752
2000-01-03  0.240713 -1.578039  1.705737  0.595139
2000-01-04 -1.117925 -0.558111 -0.152304 -0.513611
2000-01-05  0.629560  0.249130 -0.191211  1.161118


In [203]:
for row_index, row in df.iterrows():
    print('%s\n%s' % (row_index, row))

0
a    1
b    a
Name: 0, dtype: object
1
a    2
b    b
Name: 1, dtype: object
2
a    3
b    c
Name: 2, dtype: object


In [204]:
df_orig = pd.DataFrame([[1, 1.5]], columns=['int', 'float'])

In [205]:
df_orig.dtypes

int        int64
float    float64
dtype: object

In [206]:
row = next(df_orig.iterrows())[1]

In [207]:
row

int      1.0
float    1.5
Name: 0, dtype: float64

In [208]:
row['int'].dtype

dtype('float64')

In [209]:
df_orig['int'].dtype

dtype('int64')

In [210]:
df2 = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

In [211]:
print(df2)

   x  y
0  1  4
1  2  5
2  3  6


In [212]:
print(df2.T)

   0  1  2
x  1  2  3
y  4  5  6


In [213]:
df2_t = pd.DataFrame(dict((idx,values) for idx, values in df2.iterrows()))

In [214]:
print(df2_t)

   0  1  2
x  1  2  3
y  4  5  6


In [215]:
for row in df.itertuples():
    print(row)

Pandas(Index=0, a=1, b='a')
Pandas(Index=1, a=2, b='b')
Pandas(Index=2, a=3, b='c')


In [216]:
s = pd.Series(pd.date_range('20130101 09:10:12', periods=4))

In [217]:
s

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]

In [218]:
s.dt.hour

0    9
1    9
2    9
3    9
dtype: int64

In [219]:
s.dt.second

0    12
1    12
2    12
3    12
dtype: int64

In [220]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [221]:
s[s.dt.day==2]

1   2013-01-02 09:10:12
dtype: datetime64[ns]

In [222]:
stz = s.dt.tz_localize('US/Eastern')

In [223]:
stz

0   2013-01-01 09:10:12-05:00
1   2013-01-02 09:10:12-05:00
2   2013-01-03 09:10:12-05:00
3   2013-01-04 09:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [224]:
stz.dt.tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

In [225]:
s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

0   2013-01-01 04:10:12-05:00
1   2013-01-02 04:10:12-05:00
2   2013-01-03 04:10:12-05:00
3   2013-01-04 04:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [226]:
s = pd.Series(pd.date_range('20130101', periods=4))

In [227]:
s

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: datetime64[ns]

In [228]:
s.dt.strftime('%Y/%m/%d')

0    2013/01/01
1    2013/01/02
2    2013/01/03
3    2013/01/04
dtype: object

In [229]:
s = pd.Series(pd.period_range('20130101', periods=4))

In [230]:
s

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: object

In [231]:
s.dt.strftime('%Y/%m/%d')

0    2013/01/01
1    2013/01/02
2    2013/01/03
3    2013/01/04
dtype: object

In [232]:
s = pd.Series(pd.period_range('20130101', periods=4, freq='D'))

In [233]:
s

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: object

In [234]:
s.dt.year

0    2013
1    2013
2    2013
3    2013
dtype: int64

In [235]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [236]:
s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s'))

In [237]:
s

0   1 days 00:00:05
1   1 days 00:00:06
2   1 days 00:00:07
3   1 days 00:00:08
dtype: timedelta64[ns]

In [238]:
s.dt.days

0    1
1    1
2    1
3    1
dtype: int64

In [239]:
s.dt.seconds

0    5
1    6
2    7
3    8
dtype: int64

In [240]:
s.dt.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,1,0,0,5,0,0,0
1,1,0,0,6,0,0,0
2,1,0,0,7,0,0,0
3,1,0,0,8,0,0,0


In [241]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])

In [242]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [243]:
unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'],columns=['three', 'two', 'one'])

In [244]:
unsorted_df.sort_index()

Unnamed: 0,three,two,one
a,,,
b,,,
c,,,
d,,,


In [245]:
unsorted_df.sort_index(ascending=False)

Unnamed: 0,three,two,one
d,,,
c,,,
b,,,
a,,,


In [246]:
unsorted_df.sort_index(axis=1)

Unnamed: 0,one,three,two
a,,,
d,,,
c,,,
b,,,


In [247]:
unsorted_df['three'].sort_index()

a   NaN
b   NaN
c   NaN
d   NaN
Name: three, dtype: float64

In [248]:
df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})

In [249]:
df1.sort_values(by='two')

Unnamed: 0,one,three,two
0,2,5,1
2,1,3,2
1,1,4,3
3,1,2,4


In [250]:
df1[['one', 'two', 'three']].sort_values(by=['one','two'])

Unnamed: 0,one,two,three
2,1,2,3
1,1,3,4
3,1,4,2
0,2,1,5


In [251]:
s[2] = np.nan

In [252]:
s.sort_values()

0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
2     NaN
5     NaN
dtype: object

In [253]:
s.sort_values(na_position='first')

2     NaN
5     NaN
0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
dtype: object

In [254]:
ser = pd.Series([1, 2, 3])

In [255]:
ser.searchsorted([0, 3])

array([0, 2])

In [256]:
ser.searchsorted([0, 4])

array([0, 3])

In [257]:
ser.searchsorted([1, 3], side='right')

array([1, 3])

In [258]:
ser.searchsorted([1, 3], side='left')

array([0, 2])

In [259]:
ser = pd.Series([3, 1, 2])

In [260]:
ser.searchsorted([0, 3], sorter=np.argsort(ser))

array([0, 2])

In [261]:
s = pd.Series(np.random.permutation(10))

In [262]:
s

0    0
1    3
2    6
3    5
4    1
5    9
6    8
7    7
8    2
9    4
dtype: int64

In [263]:
s.sort_values()

0    0
4    1
8    2
1    3
9    4
3    5
2    6
7    7
6    8
5    9
dtype: int64

In [264]:
s.nsmallest(3)

0    0
4    1
8    2
dtype: int64

In [265]:
s.nlargest(3)

5    9
6    8
7    7
dtype: int64

In [266]:
df = pd.DataFrame({'a': [-2, -1, 1, 10, 8, 11, -1],'b': list('abdceff'),'c': [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0]})

In [267]:
df.nlargest(3, 'a')

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,


In [268]:
df.nlargest(5, ['a', 'c'])

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,
2,1,d,4.0
1,-1,b,2.0


In [269]:
df.nsmallest(3, 'a')

Unnamed: 0,a,b,c
0,-2,a,1
1,-1,b,2
6,-1,f,4


In [270]:
df.nsmallest(5, ['a', 'c'])

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
6,-1,f,4.0
2,1,d,4.0
4,8,e,


In [271]:
df1.columns = pd.MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')])

In [272]:
df1.sort_values(by=('a','two'))

Unnamed: 0_level_0,a,a,b
Unnamed: 0_level_1,one,two,three
3,1,2,4
2,1,3,2
1,1,4,3
0,2,5,1


In [273]:
dft = pd.DataFrame(dict(A = np.random.rand(3),B = 1,C = 'foo',D = pd.Timestamp('20010102'),E = pd.Series([1.0]*3).astype('float32'),F = False,G = pd.Series([1]*3,dtype='int8')))

In [274]:
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.766052,1,foo,2001-01-02,1,False,1
1,0.355578,1,foo,2001-01-02,1,False,1
2,0.519967,1,foo,2001-01-02,1,False,1


In [275]:
dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [276]:
dft['A'].dtype

dtype('float64')

In [277]:
pd.Series([1, 2, 3, 4, 5, 6.])

0    1
1    2
2    3
3    4
4    5
5    6
dtype: float64

In [278]:
pd.Series([1, 2, 3, 6., 'foo'])

0      1
1      2
2      3
3      6
4    foo
dtype: object

In [279]:
dft.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [280]:
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')

In [281]:
df1

Unnamed: 0,A
0,-1.653437
1,0.847529
2,0.969922
3,0.21053
4,-0.61236
5,0.288541
6,-0.583371
7,0.100329


In [282]:
df1.dtypes

A    float32
dtype: object

In [283]:
df2 = pd.DataFrame(dict( A = pd.Series(np.random.randn(8), dtype='float16'),
                         B = pd.Series(np.random.randn(8)),
                         C = pd.Series(np.array(np.random.randn(8), 
                                                dtype='uint8')) ))

In [284]:
df2

Unnamed: 0,A,B,C
0,-1.254883,-1.940022,0
1,0.510254,-0.271696,0
2,-1.230469,0.678815,0
3,0.571289,2.239764,1
4,0.696777,0.669714,0
5,1.046875,-0.696822,0
6,-0.088745,0.772134,0
7,0.491211,-2.028013,0


In [285]:
df2.dtypes

A    float16
B    float64
C      uint8
dtype: object

In [286]:
pd.DataFrame([1, 2], columns=['a']).dtypes

a    int64
dtype: object

In [287]:
pd.DataFrame({'a': [1, 2]}).dtypes

a    int64
dtype: object

In [288]:
pd.DataFrame({'a': 1 }, index=list(range(2))).dtypes

a    int64
dtype: object

In [289]:
frame = pd.DataFrame(np.array([1, 2]))

In [290]:
df3 = df1.reindex_like(df2).fillna(value=0.0) + df2

In [291]:
df3

Unnamed: 0,A,B,C
0,-2.90832,-1.940022,0
1,1.357783,-0.271696,0
2,-0.260546,0.678815,0
3,0.781819,2.239764,1
4,0.084417,0.669714,0
5,1.335416,-0.696822,0
6,-0.672116,0.772134,0
7,0.59154,-2.028013,0


In [292]:
df3.dtypes

A    float32
B    float64
C    float64
dtype: object

In [293]:
df3.values.dtype

dtype('float64')

In [294]:
df3

Unnamed: 0,A,B,C
0,-2.90832,-1.940022,0
1,1.357783,-0.271696,0
2,-0.260546,0.678815,0
3,0.781819,2.239764,1
4,0.084417,0.669714,0
5,1.335416,-0.696822,0
6,-0.672116,0.772134,0
7,0.59154,-2.028013,0


In [295]:
df3.dtypes

A    float32
B    float64
C    float64
dtype: object

In [296]:
df3.astype('float32').dtypes

A    float32
B    float32
C    float32
dtype: object

In [297]:
df3['D'] = '1.'

In [298]:
df3['E'] = '1'

In [299]:
df3.convert_objects(convert_numeric=True).dtypes

  if __name__ == '__main__':


A    float32
B    float64
C    float64
D    float64
E      int64
dtype: object

In [300]:
df3['D'] = df3['D'].astype('float16')

In [301]:
df3['E'] = df3['E'].astype('int32')

In [302]:
df3.dtypes

A    float32
B    float64
C    float64
D    float16
E      int32
dtype: object

In [303]:
import datetime

In [304]:
s = pd.Series([datetime.datetime(2001,1,1,0,0),
               'foo', 1.0, 1, pd.Timestamp('20010104'),'20010105'], 
               dtype='O')

In [305]:
s

0    2001-01-01 00:00:00
1                    foo
2                      1
3                      1
4    2001-01-04 00:00:00
5               20010105
dtype: object

In [306]:
s.convert_objects(convert_dates='coerce')

  if __name__ == '__main__':


0   2001-01-01
1          NaT
2          NaT
3          NaT
4   2001-01-04
5   2001-01-05
dtype: datetime64[ns]

In [307]:
dfi = df3.astype('int32')

In [308]:
dfi['E'] = 1

In [309]:
dfi

Unnamed: 0,A,B,C,D,E
0,-2,-1,0,1,1
1,1,0,0,1,1
2,0,0,0,1,1
3,0,2,1,1,1
4,0,0,0,1,1
5,1,0,0,1,1
6,0,0,0,1,1
7,0,-2,0,1,1


In [310]:
dfi.dtypes

A    int32
B    int32
C    int32
D    int32
E    int64
dtype: object

In [311]:
casted = dfi[dfi>0]

In [312]:
casted

Unnamed: 0,A,B,C,D,E
0,,,,1,1
1,1.0,,,1,1
2,,,,1,1
3,,2.0,1.0,1,1
4,,,,1,1
5,1.0,,,1,1
6,,,,1,1
7,,,,1,1


In [313]:
casted.dtypes

A    float64
B    float64
C    float64
D      int32
E      int64
dtype: object

In [314]:
dfa = df3.copy()

In [315]:
dfa['A'] = dfa['A'].astype('float32')

In [316]:
dfa.dtypes

A    float32
B    float64
C    float64
D    float16
E      int32
dtype: object

In [317]:
casted = dfa[df2>0]

In [318]:
casted

Unnamed: 0,A,B,C,D,E
0,,,,,
1,1.357783,,,,
2,,0.678815,,,
3,0.781819,2.239764,1.0,,
4,0.084417,0.669714,,,
5,1.335416,,,,
6,,0.772134,,,
7,0.59154,,,,


In [319]:
casted.dtypes

A    float32
B    float64
C    float64
D    float16
E    float64
dtype: object

In [320]:
df = pd.DataFrame({'string': list('abc'),
                   'int64': list(range(1, 4)),
                   'uint8': np.arange(3, 6).astype('u1'),
                   'float64': np.arange(4.0, 7.0),
                   'bool1': [True, False, True],
                   'bool2': [False, True, False],
                   'dates': pd.date_range('now', periods=3).values,
                   'category': pd.Series(list("ABC")).astype('category')})

In [321]:
df['tdeltas'] = df.dates.diff()

In [322]:
df['uint64'] = np.arange(3, 6).astype('u8')

In [323]:
df['other_dates'] = pd.date_range('20130101', periods=3).values

In [324]:
df['tz_aware_dates'] = pd.date_range('20130101', periods=3, tz='US/Eastern')

In [325]:
df

Unnamed: 0,bool1,bool2,category,dates,float64,int64,string,uint8,tdeltas,uint64,other_dates,tz_aware_dates
0,True,False,A,2016-02-18 10:25:13.648261,4,1,a,3,NaT,3,2013-01-01,2013-01-01 00:00:00-05:00
1,False,True,B,2016-02-19 10:25:13.648261,5,2,b,4,1 days,4,2013-01-02,2013-01-02 00:00:00-05:00
2,True,False,C,2016-02-20 10:25:13.648261,6,3,c,5,1 days,5,2013-01-03,2013-01-03 00:00:00-05:00


In [326]:
df.dtypes

bool1                                   bool
bool2                                   bool
category                            category
dates                         datetime64[ns]
float64                              float64
int64                                  int64
string                                object
uint8                                  uint8
tdeltas                      timedelta64[ns]
uint64                                uint64
other_dates                   datetime64[ns]
tz_aware_dates    datetime64[ns, US/Eastern]
dtype: object

In [327]:
df.select_dtypes(include=[bool])

Unnamed: 0,bool1,bool2
0,True,False
1,False,True
2,True,False


In [328]:
df.select_dtypes(include=['bool'])

Unnamed: 0,bool1,bool2
0,True,False
1,False,True
2,True,False


In [329]:
df.select_dtypes(include=['number', 'bool'], exclude=['unsignedinteger'])

Unnamed: 0,bool1,bool2,float64,int64,tdeltas
0,True,False,4,1,NaT
1,False,True,5,2,1 days
2,True,False,6,3,1 days


In [330]:
df.select_dtypes(include=['object'])


Unnamed: 0,string
0,a
1,b
2,c


In [331]:
def subdtypes(dtype):
    subs = dtype.__subclasses__()
    if not subs:
        return dtype
    return [dtype, [subdtypes(dt) for dt in subs]]

In [332]:
subdtypes(np.generic)

[numpy.generic,
 [[numpy.number,
   [[numpy.integer,
     [[numpy.signedinteger,
       [numpy.int8,
        numpy.int16,
        numpy.int32,
        numpy.int64,
        numpy.int64,
        numpy.timedelta64]],
      [numpy.unsignedinteger,
       [numpy.uint8,
        numpy.uint16,
        numpy.uint64,
        numpy.uint32,
        numpy.uint64]]]],
    [numpy.inexact,
     [[numpy.floating,
       [numpy.float16, numpy.float32, numpy.float128, numpy.float64]],
      [numpy.complexfloating,
       [numpy.complex64, numpy.complex128, numpy.complex256]]]]]],
  [numpy.flexible,
   [[numpy.character, [numpy.bytes_, numpy.str_]],
    [numpy.void, [numpy.record]]]],
  numpy.datetime64,
  numpy.bool_,
  numpy.object_]]