In [1]:
import numpy as np
import pandas as pd



## object creation

In [2]:
# creat a series
s = pd.Series([1,3,5,np.nan,6,8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [3]:
# creat a DataFrame
dates = pd.date_range('20191023', periods=6)
print(dates)

# pd.DataFrame(array, index, col_index)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('abcd'))
print(df)

DatetimeIndex(['2019-10-23', '2019-10-24', '2019-10-25', '2019-10-26',
               '2019-10-27', '2019-10-28'],
              dtype='datetime64[ns]', freq='D')
                   a         b         c         d
2019-10-23 -1.144811 -0.332166 -1.497940  0.911046
2019-10-24  2.362060 -1.408420 -1.379263  1.077921
2019-10-25  0.193773  1.562984 -0.114057  1.098926
2019-10-26  1.428276  0.663978  0.466409 -0.642886
2019-10-27  0.795113  0.813499 -0.023738 -0.248085
2019-10-28  1.114396 -0.586237  1.789198 -0.214345


In [4]:
# creating a DataFrame by passing a dict of object 
df2 = pd.DataFrame({'a':1,
                    'b':pd.Timestamp('20191025'),
                    'c':pd.Series(1,index=list(range(4)),dtype='float32'),
                    'd':np.array([3]*4, dtype='int32'),
                    'e': pd.Categorical(["test", "train", "test", "train"]),
                    'f':'foo'
                    })

print(df2)

   a          b    c  d      e    f
0  1 2019-10-25  1.0  3   test  foo
1  1 2019-10-25  1.0  3  train  foo
2  1 2019-10-25  1.0  3   test  foo
3  1 2019-10-25  1.0  3  train  foo


In [5]:
np.array([3]*4, dtype='int32')

array([3, 3, 3, 3])

## viewing data

In [6]:
df.head()

Unnamed: 0,a,b,c,d
2019-10-23,-1.144811,-0.332166,-1.49794,0.911046
2019-10-24,2.36206,-1.40842,-1.379263,1.077921
2019-10-25,0.193773,1.562984,-0.114057,1.098926
2019-10-26,1.428276,0.663978,0.466409,-0.642886
2019-10-27,0.795113,0.813499,-0.023738,-0.248085


In [7]:
df.tail(3)

Unnamed: 0,a,b,c,d
2019-10-26,1.428276,0.663978,0.466409,-0.642886
2019-10-27,0.795113,0.813499,-0.023738,-0.248085
2019-10-28,1.114396,-0.586237,1.789198,-0.214345


In [8]:
df.index

DatetimeIndex(['2019-10-23', '2019-10-24', '2019-10-25', '2019-10-26',
               '2019-10-27', '2019-10-28'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['a', 'b', 'c', 'd'], dtype='object')

In [10]:
df.to_numpy()

array([[-1.14481127, -0.33216641, -1.49793986,  0.91104642],
       [ 2.36205975, -1.40842012, -1.3792635 ,  1.07792101],
       [ 0.19377278,  1.5629842 , -0.11405724,  1.09892604],
       [ 1.42827555,  0.66397787,  0.46640859, -0.64288605],
       [ 0.79511315,  0.81349872, -0.02373785, -0.24808535],
       [ 1.11439565, -0.5862375 ,  1.78919772, -0.21434506]])

In [11]:
df2.to_numpy()

array([[1, Timestamp('2019-10-25 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1, Timestamp('2019-10-25 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1, Timestamp('2019-10-25 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1, Timestamp('2019-10-25 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

***Note:*** DataFrame.to_numpy() does not include the index or column labels in the output.

### decribe() shows a quick statistic summary of data


In [12]:
df.describe()

Unnamed: 0,a,b,c,d
count,6.0,6.0,6.0,6.0
mean,0.791468,0.118939,-0.126565,0.33043
std,1.19021,1.086125,1.223308,0.782975
min,-1.144811,-1.40842,-1.49794,-0.642886
25%,0.344108,-0.52272,-1.062962,-0.23965
50%,0.954754,0.165906,-0.068898,0.348351
75%,1.349806,0.776119,0.343872,1.036202
max,2.36206,1.562984,1.789198,1.098926


### df.T transposing data

In [13]:
df.T

Unnamed: 0,2019-10-23 00:00:00,2019-10-24 00:00:00,2019-10-25 00:00:00,2019-10-26 00:00:00,2019-10-27 00:00:00,2019-10-28 00:00:00
a,-1.144811,2.36206,0.193773,1.428276,0.795113,1.114396
b,-0.332166,-1.40842,1.562984,0.663978,0.813499,-0.586237
c,-1.49794,-1.379263,-0.114057,0.466409,-0.023738,1.789198
d,0.911046,1.077921,1.098926,-0.642886,-0.248085,-0.214345


### sorting by values

In [14]:
df.sort_values(by='c')

Unnamed: 0,a,b,c,d
2019-10-23,-1.144811,-0.332166,-1.49794,0.911046
2019-10-24,2.36206,-1.40842,-1.379263,1.077921
2019-10-25,0.193773,1.562984,-0.114057,1.098926
2019-10-27,0.795113,0.813499,-0.023738,-0.248085
2019-10-26,1.428276,0.663978,0.466409,-0.642886
2019-10-28,1.114396,-0.586237,1.789198,-0.214345


## Selection

### getting

In [15]:
print(df['a'])

2019-10-23   -1.144811
2019-10-24    2.362060
2019-10-25    0.193773
2019-10-26    1.428276
2019-10-27    0.795113
2019-10-28    1.114396
Freq: D, Name: a, dtype: float64


In [16]:
# selecting via [], which slices the row
print(df[0:3])
print(df['20191024':'20191027'])

                   a         b         c         d
2019-10-23 -1.144811 -0.332166 -1.497940  0.911046
2019-10-24  2.362060 -1.408420 -1.379263  1.077921
2019-10-25  0.193773  1.562984 -0.114057  1.098926
                   a         b         c         d
2019-10-24  2.362060 -1.408420 -1.379263  1.077921
2019-10-25  0.193773  1.562984 -0.114057  1.098926
2019-10-26  1.428276  0.663978  0.466409 -0.642886
2019-10-27  0.795113  0.813499 -0.023738 -0.248085


### selection by label
loc:selecting by label

In [17]:
print(df)

                   a         b         c         d
2019-10-23 -1.144811 -0.332166 -1.497940  0.911046
2019-10-24  2.362060 -1.408420 -1.379263  1.077921
2019-10-25  0.193773  1.562984 -0.114057  1.098926
2019-10-26  1.428276  0.663978  0.466409 -0.642886
2019-10-27  0.795113  0.813499 -0.023738 -0.248085
2019-10-28  1.114396 -0.586237  1.789198 -0.214345


In [18]:
df.loc[dates[0]]


a   -1.144811
b   -0.332166
c   -1.497940
d    0.911046
Name: 2019-10-23 00:00:00, dtype: float64

In [19]:
# selecting on a multi-axis by label
df.loc[:, ['a','b']]

Unnamed: 0,a,b
2019-10-23,-1.144811,-0.332166
2019-10-24,2.36206,-1.40842
2019-10-25,0.193773,1.562984
2019-10-26,1.428276,0.663978
2019-10-27,0.795113,0.813499
2019-10-28,1.114396,-0.586237


In [20]:
# note: endpoints are included
df.loc['20191024':'20191027', ['a', 'b']]

Unnamed: 0,a,b
2019-10-24,2.36206,-1.40842
2019-10-25,0.193773,1.562984
2019-10-26,1.428276,0.663978
2019-10-27,0.795113,0.813499


### getting a scalar value


In [21]:
df.loc[dates[0], 'a']


-1.1448112731288271

In [22]:
df.at[dates[0], 'a']

-1.1448112731288271

### selection by position
iloc: selecting by index

In [23]:
df.iloc[3]

a    1.428276
b    0.663978
c    0.466409
d   -0.642886
Name: 2019-10-26 00:00:00, dtype: float64

In [24]:
df.iloc[3:5,0:2]

Unnamed: 0,a,b
2019-10-26,1.428276,0.663978
2019-10-27,0.795113,0.813499


In [25]:
df.iloc[[1,2,4],[0,2]]

Unnamed: 0,a,c
2019-10-24,2.36206,-1.379263
2019-10-25,0.193773,-0.114057
2019-10-27,0.795113,-0.023738


In [26]:
df.iloc[1:3, :]
# endpoins are not included

Unnamed: 0,a,b,c,d
2019-10-24,2.36206,-1.40842,-1.379263,1.077921
2019-10-25,0.193773,1.562984,-0.114057,1.098926


slicing columns explicitly


In [27]:
df.iloc[:, 2:4]

Unnamed: 0,c,d
2019-10-23,-1.49794,0.911046
2019-10-24,-1.379263,1.077921
2019-10-25,-0.114057,1.098926
2019-10-26,0.466409,-0.642886
2019-10-27,-0.023738,-0.248085
2019-10-28,1.789198,-0.214345


## boolean indexing

In [28]:
df.a # equivelantly df['a']

2019-10-23   -1.144811
2019-10-24    2.362060
2019-10-25    0.193773
2019-10-26    1.428276
2019-10-27    0.795113
2019-10-28    1.114396
Freq: D, Name: a, dtype: float64

In [29]:
df['a'] > 0

2019-10-23    False
2019-10-24     True
2019-10-25     True
2019-10-26     True
2019-10-27     True
2019-10-28     True
Freq: D, Name: a, dtype: bool

In [30]:
df[df['a']>0]

Unnamed: 0,a,b,c,d
2019-10-24,2.36206,-1.40842,-1.379263,1.077921
2019-10-25,0.193773,1.562984,-0.114057,1.098926
2019-10-26,1.428276,0.663978,0.466409,-0.642886
2019-10-27,0.795113,0.813499,-0.023738,-0.248085
2019-10-28,1.114396,-0.586237,1.789198,-0.214345


In [31]:
df[df>0]

Unnamed: 0,a,b,c,d
2019-10-23,,,,0.911046
2019-10-24,2.36206,,,1.077921
2019-10-25,0.193773,1.562984,,1.098926
2019-10-26,1.428276,0.663978,0.466409,
2019-10-27,0.795113,0.813499,,
2019-10-28,1.114396,,1.789198,


### using the isin() method for filtering 

In [60]:
df2 = df.copy()
df2['e'] = ['one', 'two', 'three', 'four', 'five', 'six']
df2

Unnamed: 0,a,b,c,d,e
2019-10-23,0.0,0.0,-1.49794,5,one
2019-10-24,2.36206,-1.40842,-1.379263,5,two
2019-10-25,0.193773,1.562984,-0.114057,5,three
2019-10-26,1.428276,0.663978,0.466409,5,four
2019-10-27,0.795113,0.813499,-0.023738,5,five
2019-10-28,1.114396,-0.586237,1.789198,5,six


In [64]:
df2[df2['e'].isin(['two', 'four'])]


Unnamed: 0,a,b,c,d,e
2019-10-24,2.36206,-1.40842,-1.379263,5,two
2019-10-26,1.428276,0.663978,0.466409,5,four



## setting

In [34]:
s1 = pd.Series(list(range(1,7)), index=pd.date_range('20191111', periods=6))
print(s1)

2019-11-11    1
2019-11-12    2
2019-11-13    3
2019-11-14    4
2019-11-15    5
2019-11-16    6
Freq: D, dtype: int64


### setting values by label


In [35]:
df.at[dates[0], 'a'] = 0

In [36]:
df

Unnamed: 0,a,b,c,d
2019-10-23,0.0,-0.332166,-1.49794,0.911046
2019-10-24,2.36206,-1.40842,-1.379263,1.077921
2019-10-25,0.193773,1.562984,-0.114057,1.098926
2019-10-26,1.428276,0.663978,0.466409,-0.642886
2019-10-27,0.795113,0.813499,-0.023738,-0.248085
2019-10-28,1.114396,-0.586237,1.789198,-0.214345


### setting values by position

In [37]:
df.iat[0,1] = 0

In [38]:
df

Unnamed: 0,a,b,c,d
2019-10-23,0.0,0.0,-1.49794,0.911046
2019-10-24,2.36206,-1.40842,-1.379263,1.077921
2019-10-25,0.193773,1.562984,-0.114057,1.098926
2019-10-26,1.428276,0.663978,0.466409,-0.642886
2019-10-27,0.795113,0.813499,-0.023738,-0.248085
2019-10-28,1.114396,-0.586237,1.789198,-0.214345


### setting by assigning with a numpy array

In [39]:
df.loc[:, 'd'] = np.array([5] * len(df))

In [40]:
df

Unnamed: 0,a,b,c,d
2019-10-23,0.0,0.0,-1.49794,5
2019-10-24,2.36206,-1.40842,-1.379263,5
2019-10-25,0.193773,1.562984,-0.114057,5
2019-10-26,1.428276,0.663978,0.466409,5
2019-10-27,0.795113,0.813499,-0.023738,5
2019-10-28,1.114396,-0.586237,1.789198,5


## missing data
### df.reindex()

In [41]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns)+['e'])
df1.loc[dates[0]:dates[1], 'e'] = 1
df1

Unnamed: 0,a,b,c,d,e
2019-10-23,0.0,0.0,-1.49794,5,1.0
2019-10-24,2.36206,-1.40842,-1.379263,5,1.0
2019-10-25,0.193773,1.562984,-0.114057,5,
2019-10-26,1.428276,0.663978,0.466409,5,


In [42]:
# drop any rows that have missing data
df1.dropna(how='any')

Unnamed: 0,a,b,c,d,e
2019-10-23,0.0,0.0,-1.49794,5,1.0
2019-10-24,2.36206,-1.40842,-1.379263,5,1.0


In [43]:
# drop all rows that have missing data
df1.dropna(how='all')

Unnamed: 0,a,b,c,d,e
2019-10-23,0.0,0.0,-1.49794,5,1.0
2019-10-24,2.36206,-1.40842,-1.379263,5,1.0
2019-10-25,0.193773,1.562984,-0.114057,5,
2019-10-26,1.428276,0.663978,0.466409,5,


In [44]:
# filling missing data
df2 = df1.fillna(value='missing')
print(df2)
# df2.loc[:,'e']

                   a         b         c  d        e
2019-10-23  0.000000  0.000000 -1.497940  5        1
2019-10-24  2.362060 -1.408420 -1.379263  5        1
2019-10-25  0.193773  1.562984 -0.114057  5  missing
2019-10-26  1.428276  0.663978  0.466409  5  missing


In [45]:
# get the boolean mask where values are nan
pd.isna(df1)

Unnamed: 0,a,b,c,d,e
2019-10-23,False,False,False,False,False
2019-10-24,False,False,False,False,False
2019-10-25,False,False,False,False,True
2019-10-26,False,False,False,False,True


## operations

### stats

In [68]:
# performing a descriptive statistic
print(df.describe())
print('*'*50)
print(df.mean())
print('*'*50)
print(df.mean(axis=1))
print('*'*50)
print(df.mean(axis=0))



              a         b         c    d
count  6.000000  6.000000  6.000000  6.0
mean   0.982269  0.174301 -0.126565  5.0
std    0.865079  1.066827  1.223308  0.0
min    0.000000 -1.408420 -1.497940  5.0
25%    0.344108 -0.439678 -1.062962  5.0
50%    0.954754  0.331989 -0.068898  5.0
75%    1.349806  0.776119  0.343872  5.0
max    2.362060  1.562984  1.789198  5.0
**************************************************
a    0.982269
b    0.174301
c   -0.126565
d    5.000000
dtype: float64
**************************************************
2019-10-23    0.875515
2019-10-24    1.143594
2019-10-25    1.660675
2019-10-26    1.889666
2019-10-27    1.646219
2019-10-28    1.829339
Freq: D, dtype: float64
**************************************************
a    0.982269
b    0.174301
c   -0.126565
d    5.000000
dtype: float64


In [47]:
s = pd.Series([1,3,5,np.nan, 6,8],index=dates)
s



2019-10-23    1.0
2019-10-24    3.0
2019-10-25    5.0
2019-10-26    NaN
2019-10-27    6.0
2019-10-28    8.0
Freq: D, dtype: float64

In [48]:
print(df.sub(s, axis='index'))
df

                   a         b         c    d
2019-10-23 -1.000000 -1.000000 -2.497940  4.0
2019-10-24 -0.637940 -4.408420 -4.379263  2.0
2019-10-25 -4.806227 -3.437016 -5.114057  0.0
2019-10-26       NaN       NaN       NaN  NaN
2019-10-27 -5.204887 -5.186501 -6.023738 -1.0
2019-10-28 -6.885604 -8.586237 -6.210802 -3.0


Unnamed: 0,a,b,c,d
2019-10-23,0.0,0.0,-1.49794,5
2019-10-24,2.36206,-1.40842,-1.379263,5
2019-10-25,0.193773,1.562984,-0.114057,5
2019-10-26,1.428276,0.663978,0.466409,5
2019-10-27,0.795113,0.813499,-0.023738,5
2019-10-28,1.114396,-0.586237,1.789198,5


### apply
applying functions to the data

In [49]:
df.apply(np.cumsum)


Unnamed: 0,a,b,c,d
2019-10-23,0.0,0.0,-1.49794,5
2019-10-24,2.36206,-1.40842,-2.877203,10
2019-10-25,2.555833,0.154564,-2.991261,15
2019-10-26,3.984108,0.818542,-2.524852,20
2019-10-27,4.779221,1.632041,-2.54859,25
2019-10-28,5.893617,1.045803,-0.759392,30


In [50]:
df.apply(lambda x:x.max() - x.min())

a    2.362060
b    2.971404
c    3.287138
d    0.000000
dtype: float64

### histogramming

In [51]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    6
1    1
2    4
3    1
4    0
5    0
6    4
7    1
8    0
9    1
dtype: int32

In [69]:
s.value_counts()  # return the count of value


8.0    1
6.0    1
5.0    1
3.0    1
1.0    1
dtype: int64

### string methods
generally uses regular expression by default 

In [77]:
s = pd.Series(['a','B','C','d','AbD',np.nan])#,[1,4,123,123,44,11]])
s.str.lower()


0      a
1      b
2      c
3      d
4    abd
5    NaN
dtype: object

## merge

### concat

In [81]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,-1.737394,-1.584861,1.354222,-1.189527
1,-1.706995,-0.572205,1.948753,-1.353885
2,-0.464005,-1.254109,-0.4535,1.135584
3,0.902405,-0.801544,2.270568,0.298512
4,0.482657,-0.171032,-0.262119,0.262703
5,-1.417361,2.004298,2.582695,2.150646
6,0.37051,0.290963,0.956341,0.903678
7,0.455107,-0.490177,0.411205,1.326117
8,1.263528,-0.140004,-0.133637,-0.164293
9,0.091391,1.444391,0.334401,-0.536898


In [92]:
pieces = [df[:3], df[2:7], df[0]]
print(type(pieces))
print(type(pieces[0]))
print(pd.concat(pieces,axis=1))

<class 'list'>
<class 'pandas.core.frame.DataFrame'>
          0         1         2         3         0         1         2  \
0 -1.737394 -1.584861  1.354222 -1.189527       NaN       NaN       NaN   
1 -1.706995 -0.572205  1.948753 -1.353885       NaN       NaN       NaN   
2 -0.464005 -1.254109 -0.453500  1.135584 -0.464005 -1.254109 -0.453500   
3       NaN       NaN       NaN       NaN  0.902405 -0.801544  2.270568   
4       NaN       NaN       NaN       NaN  0.482657 -0.171032 -0.262119   
5       NaN       NaN       NaN       NaN -1.417361  2.004298  2.582695   
6       NaN       NaN       NaN       NaN  0.370510  0.290963  0.956341   
7       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
8       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
9       NaN       NaN       NaN       NaN       NaN       NaN       NaN   

          3         0  
0       NaN -1.737394  
1       NaN -1.706995  
2  1.135584 -0.464005  
3  0.298512  0.902405  
4

### join

In [95]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
print(left)
print(right)

   key  lval
0  foo     1
1  foo     2
   key  rval
0  foo     4
1  foo     5


In [100]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [102]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
print(pd.merge(left,right,on='key'))

   key  lval  rval
0  foo     1     4
1  bar     2     5


### append

In [119]:
df = pd.DataFrame(np.random.randn(8,4), columns=list('abcd'))
# df
s = df.loc[0]
s
df.append(s, ignore_index=True)  # if ignore_index=False(default),it would append with initial index



Unnamed: 0,a,b,c,d
0,1.354227,1.14603,0.128675,0.257873
1,-0.433177,-2.012264,-0.884185,1.452595
2,0.654226,-1.615389,-1.265937,1.819272
3,0.021121,-0.03398,-0.246129,2.638633
4,-1.970959,0.017091,-0.836447,-0.359465
5,-3.40639,1.122086,-2.286254,-1.487224
6,-0.824124,0.211413,-0.271275,-1.09576
7,1.436613,0.36595,-0.162878,-0.598318
8,1.354227,1.14603,0.128675,0.257873


### grouping
'group by' are referring to a process involving one or more of the following steps
+ **Splitting**
+ **Applying**
+ **Combining**


In [122]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                         'foo', 'bar', 'foo', 'foo'],
                   'B':['one', 'one', 'two', 'three',
                        'two', 'two', 'one', 'three'],
                   'C': np.random.randn(8),
                   'D': np.random.randn(8)})
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.016851,-0.100885
1,bar,one,-0.520713,-0.86199
2,foo,two,1.225373,0.671101
3,bar,three,1.560541,-1.019316
4,foo,two,-0.755831,-1.129835
5,bar,two,-1.630908,0.896623
6,foo,one,-1.802007,-2.625324
7,foo,three,-2.720111,0.715457


In [124]:
df.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-0.59108,-0.984682
foo,-4.069426,-2.469485


In [126]:
df.groupby(['A','B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.520713,-0.86199
bar,three,1.560541,-1.019316
bar,two,-1.630908,0.896623
foo,one,-1.818858,-2.726208
foo,three,-2.720111,0.715457
foo,two,0.469543,-0.458734


## reshaping

### stack

In [132]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
print(index)
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]
df2

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           codes=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])


Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.702398,-1.381821
bar,two,-0.096019,0.632854
baz,one,1.997007,0.724207
baz,two,0.450249,-0.356366


In [133]:
stacked = df2.stack()
stacked

first  second   
bar    one     A    0.702398
               B   -1.381821
       two     A   -0.096019
               B    0.632854
baz    one     A    1.997007
               B    0.724207
       two     A    0.450249
               B   -0.356366
dtype: float64