In [1]:
import pandas as pd
import numpy as np
import itertools

# DataFrame
DataFrame is a __2-dimensional__ labeled data structure with columns  
Accepts: 
1. Dict of 1D ndarrays, lists, dicts, or Series
2. 2-D numpy.ndarray
3. Structured or record ndarray (Array with named fields)
4. A pd.Series
5. Another DataFrame

In [2]:
df = pd.DataFrame(np.random.rand(6, 4)*100,
                  index=['a', 'b', 'c', 'd', 'e', 'f'],
                  columns=['one', 'two', 'three', 'four']) # Create a 6x4 Dataframe

In [3]:
df

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,34.309186,22.437078
b,7.051927,6.406754,61.736579,67.622311
c,35.462478,97.267604,10.570521,18.526606
d,8.456016,15.859291,42.272089,72.040873
e,30.14833,9.196565,66.5287,28.38317
f,13.589995,12.772728,80.112225,55.187197


## Selecting elements

In [4]:
df.index # Get rows

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [5]:
df.columns # Get columns

Index(['one', 'two', 'three', 'four'], dtype='object')

In [6]:
df.loc['a'] # Get rows

one      76.453545
two      96.363305
three    34.309186
four     22.437078
Name: a, dtype: float64

In [7]:
df.iloc[0] # Integer index for rows

one      76.453545
two      96.363305
three    34.309186
four     22.437078
Name: a, dtype: float64

In [8]:
type(df.iloc[0]) # All rows are pandas.Series

pandas.core.series.Series

In [9]:
df.head(n=3) # Get first n rows

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,34.309186,22.437078
b,7.051927,6.406754,61.736579,67.622311
c,35.462478,97.267604,10.570521,18.526606


In [10]:
df['one'] # Get column

a    76.453545
b     7.051927
c    35.462478
d     8.456016
e    30.148330
f    13.589995
Name: one, dtype: float64

In [11]:
df.one # Scary notation, may shadow in-built functions, Try Avoiding it

a    76.453545
b     7.051927
c    35.462478
d     8.456016
e    30.148330
f    13.589995
Name: one, dtype: float64

In [12]:
type(df['two']) # Columns are also pandas.Series

pandas.core.series.Series

In [13]:
df.loc[['a', 'b']] # Get two(Multiple) rows

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,34.309186,22.437078
b,7.051927,6.406754,61.736579,67.622311


In [14]:
df[['two', 'one']] # Get two(Multiple) columns

Unnamed: 0,two,one
a,96.363305,76.453545
b,6.406754,7.051927
c,97.267604,35.462478
d,15.859291,8.456016
e,9.196565,30.14833
f,12.772728,13.589995


In [15]:
df.loc[['a', 'b'], ['one']] # Get multiple rows and columns

Unnamed: 0,one
a,76.453545
b,7.051927


In [16]:
df.loc[['b', 'c', 'a', 'e'], ['two', 'one']] # Change the selection order

Unnamed: 0,two,one
b,6.406754,7.051927
c,97.267604,35.462478
a,96.363305,76.453545
e,9.196565,30.14833


In [17]:
df.shape # Get shape

(6, 4)

## Conditional selection

In [18]:
df > df.median() # All values <,> are reflected as true or false

Unnamed: 0,one,two,three,four
a,True,True,False,False
b,False,False,True,True
c,True,True,False,False
d,False,True,False,True
e,True,False,True,False
f,False,False,True,True


In [19]:
df[df > df.median()] # Get NaN for every false value above

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,,
b,,,61.736579,67.622311
c,35.462478,97.267604,,
d,,15.859291,,72.040873
e,30.14833,,66.5287,
f,,,80.112225,55.187197


In [20]:
df[df['one'] > 50] # Using conditional selection for columns returns reduced dataframe

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,34.309186,22.437078


In [21]:
df[df['one'] > 50]['two'] # Now get the row 'two' from the modified dataframe

a    96.363305
Name: two, dtype: float64

In [22]:
df[df['one'] > 50][['two', 'three']] # Get multiple columns

Unnamed: 0,two,three
a,96.363305,34.309186


In [23]:
df[(df['one'] > 50) & (df['two'] < 50)] # Anding out multiple conditions using the & operator(and doesn't work)

Unnamed: 0,one,two,three,four


In [24]:
df[(df['three'] > 50) | (df['four'] > 90)] # Or operation using | operator

Unnamed: 0,one,two,three,four
b,7.051927,6.406754,61.736579,67.622311
e,30.14833,9.196565,66.5287,28.38317
f,13.589995,12.772728,80.112225,55.187197


## Operations on Dataframes

In [25]:
df['three'] = df['one'] + df['two'] # Adding columns

In [26]:
df

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,172.81685,22.437078
b,7.051927,6.406754,13.458681,67.622311
c,35.462478,97.267604,132.730082,18.526606
d,8.456016,15.859291,24.315307,72.040873
e,30.14833,9.196565,39.344895,28.38317
f,13.589995,12.772728,26.362723,55.187197


In [27]:
df

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,172.81685,22.437078
b,7.051927,6.406754,13.458681,67.622311
c,35.462478,97.267604,132.730082,18.526606
d,8.456016,15.859291,24.315307,72.040873
e,30.14833,9.196565,39.344895,28.38317
f,13.589995,12.772728,26.362723,55.187197


In [28]:
df.drop('a', axis=0) # Remove rows

Unnamed: 0,one,two,three,four
b,7.051927,6.406754,13.458681,67.622311
c,35.462478,97.267604,132.730082,18.526606
d,8.456016,15.859291,24.315307,72.040873
e,30.14833,9.196565,39.344895,28.38317
f,13.589995,12.772728,26.362723,55.187197


In [29]:
df.drop('three', axis=1) # Remove Column

Unnamed: 0,one,two,four
a,76.453545,96.363305,22.437078
b,7.051927,6.406754,67.622311
c,35.462478,97.267604,18.526606
d,8.456016,15.859291,72.040873
e,30.14833,9.196565,28.38317
f,13.589995,12.772728,55.187197


In [30]:
df # Does not loose any row/column as defaults for inplace is false

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,172.81685,22.437078
b,7.051927,6.406754,13.458681,67.622311
c,35.462478,97.267604,132.730082,18.526606
d,8.456016,15.859291,24.315307,72.040873
e,30.14833,9.196565,39.344895,28.38317
f,13.589995,12.772728,26.362723,55.187197


In [31]:
df.loc['f'] = df.loc['e'] + df.loc['a'] # Add rows

In [32]:
df['temp'] = df['one'] + df['two'] + df['three']

In [33]:
df

Unnamed: 0,one,two,three,four,temp
a,76.453545,96.363305,172.81685,22.437078,345.6337
b,7.051927,6.406754,13.458681,67.622311,26.917362
c,35.462478,97.267604,132.730082,18.526606,265.460164
d,8.456016,15.859291,24.315307,72.040873,48.630614
e,30.14833,9.196565,39.344895,28.38317,78.68979
f,106.601874,105.559871,212.161745,50.820248,424.32349


In [34]:
df.drop('temp', axis=1, inplace=True) # Alters columns of dataframe df

In [35]:
df.drop('f', axis=0, inplace=True) # Alters rows of dataframe df

In [36]:
df # Resultant dataframe

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,172.81685,22.437078
b,7.051927,6.406754,13.458681,67.622311
c,35.462478,97.267604,132.730082,18.526606
d,8.456016,15.859291,24.315307,72.040873
e,30.14833,9.196565,39.344895,28.38317


In [37]:
df.reset_index() # Reset index to integer values(inplace=False as default)

Unnamed: 0,index,one,two,three,four
0,a,76.453545,96.363305,172.81685,22.437078
1,b,7.051927,6.406754,13.458681,67.622311
2,c,35.462478,97.267604,132.730082,18.526606
3,d,8.456016,15.859291,24.315307,72.040873
4,e,30.14833,9.196565,39.344895,28.38317


In [38]:
df

Unnamed: 0,one,two,three,four
a,76.453545,96.363305,172.81685,22.437078
b,7.051927,6.406754,13.458681,67.622311
c,35.462478,97.267604,132.730082,18.526606
d,8.456016,15.859291,24.315307,72.040873
e,30.14833,9.196565,39.344895,28.38317


In [39]:
newIndex = 'H He Li Be B'.split()

In [40]:
newIndex

['H', 'He', 'Li', 'Be', 'B']

In [41]:
df['elements'] = newIndex

In [42]:
df

Unnamed: 0,one,two,three,four,elements
a,76.453545,96.363305,172.81685,22.437078,H
b,7.051927,6.406754,13.458681,67.622311,He
c,35.462478,97.267604,132.730082,18.526606,Li
d,8.456016,15.859291,24.315307,72.040873,Be
e,30.14833,9.196565,39.344895,28.38317,B


In [43]:
df.set_index('elements') # Set column as index

Unnamed: 0_level_0,one,two,three,four
elements,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
H,76.453545,96.363305,172.81685,22.437078
He,7.051927,6.406754,13.458681,67.622311
Li,35.462478,97.267604,132.730082,18.526606
Be,8.456016,15.859291,24.315307,72.040873
B,30.14833,9.196565,39.344895,28.38317


In [44]:
df # set_index also doesn't default to inplace=True

Unnamed: 0,one,two,three,four,elements
a,76.453545,96.363305,172.81685,22.437078,H
b,7.051927,6.406754,13.458681,67.622311,He
c,35.462478,97.267604,132.730082,18.526606,Li
d,8.456016,15.859291,24.315307,72.040873,Be
e,30.14833,9.196565,39.344895,28.38317,B


## Finding Uniques

In [45]:
df = pd.DataFrame({'a': [np.random.randint(0, 10) for x in range(0, 20)],
                  'b': [np.random.randint(10, 20) for x in range(0, 20)],
                  'c': [np.random.randint(20, 30) for x in range(0, 20)]})

In [46]:
df

Unnamed: 0,a,b,c
0,5,13,29
1,5,16,27
2,2,16,26
3,3,12,24
4,4,12,26
5,3,13,25
6,7,13,26
7,9,15,22
8,6,16,20
9,8,18,22


In [47]:
df['a'].unique() # Get all unique values

array([5, 2, 3, 4, 7, 9, 6, 8, 0])

In [48]:
df['b'].nunique() # Get number of unique values

8

In [49]:
df['c'].value_counts() # Get count of each unique value

26    6
29    3
25    3
23    2
22    2
20    2
27    1
24    1
Name: c, dtype: int64

## Sorting

In [50]:
df.sort_values(by='a') # Sort values in column 'a'

Unnamed: 0,a,b,c
17,0,16,25
12,0,14,26
2,2,16,26
3,3,12,24
5,3,13,25
4,4,12,26
16,4,18,26
14,4,11,23
0,5,13,29
1,5,16,27


# Multiple Indexes

In [51]:
outside = ['G1', 'G2']

In [52]:
inside = [1, 2, 3]

In [53]:
hier_index = list(itertools.product(outside, inside))

In [54]:
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [55]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [56]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [57]:
df = pd.DataFrame(np.random.randn(6,2), index=hier_index, columns=['C1', 'C2'])

In [58]:
df

Unnamed: 0,Unnamed: 1,C1,C2
G1,1,-0.39882,0.833642
G1,2,-0.91946,1.19563
G1,3,1.034852,-0.54934
G2,1,-0.196821,0.872685
G2,2,-0.892006,0.319175
G2,3,-0.701212,1.788616


In [59]:
df.index.names = ['Groups', 'Num'] # Set Names for indexes

In [60]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,C1,C2
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.39882,0.833642
G1,2,-0.91946,1.19563
G1,3,1.034852,-0.54934
G2,1,-0.196821,0.872685
G2,2,-0.892006,0.319175
G2,3,-0.701212,1.788616


In [61]:
df.loc['G1'] # Getting a group

Unnamed: 0_level_0,C1,C2
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.39882,0.833642
2,-0.91946,1.19563
3,1.034852,-0.54934


In [62]:
df.loc['G2'].loc[2] # Going deeper

C1   -0.892006
C2    0.319175
Name: 2, dtype: float64

In [63]:
df.loc['G2'].loc[2]['C1'] # Get element from index in the hierarchy

-0.89200577621692

In [64]:
df.xs(1, level='Num') # Cross-section to inner level directly(Here index 1 from Num)

Unnamed: 0_level_0,C1,C2
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.39882,0.833642
G2,-0.196821,0.872685


## Working with missing Indexes

In [65]:
df = pd.DataFrame({'a':[1, 2, 3], 'b':[1, 2, np.nan], 'c': [1,np.nan, np.nan]})

In [66]:
df

Unnamed: 0,a,b,c
0,1,1.0,1.0
1,2,2.0,
2,3,,


In [67]:
df.dropna() # Drop all indexes containing Nan

Unnamed: 0,a,b,c
0,1,1.0,1.0


In [68]:
df.dropna(axis=1) # Or drop all columns containing NaN

Unnamed: 0,a
0,1
1,2
2,3


In [69]:
df.dropna(thresh=2) # Allow <2 NaN values

Unnamed: 0,a,b,c
0,1,1.0,1.0
1,2,2.0,


In [70]:
df.fillna(0) # Fill all NaNs with 0

Unnamed: 0,a,b,c
0,1,1.0,1.0
1,2,2.0,0.0
2,3,0.0,0.0


In [71]:
df.fillna(value=df['a'].mean())

Unnamed: 0,a,b,c
0,1,1.0,1.0
1,2,2.0,2.0
2,3,2.0,2.0


## Groupby

In [72]:
company = (['GOOG', 'AMZN', 'MSFT', 'FB'] * 4)
company = sorted(company, key=str.upper)
dept = ['ADS', 'HW', 'SW', 'VR'] * 4
randvals = [np.random.randint(100, 500) for x in range(16)]

In [73]:
baddf = pd.DataFrame({'Company': company, 'dept':dept, 'sales': randvals})

In [74]:
baddf

Unnamed: 0,Company,dept,sales
0,AMZN,ADS,336
1,AMZN,HW,103
2,AMZN,SW,284
3,AMZN,VR,431
4,FB,ADS,492
5,FB,HW,387
6,FB,SW,299
7,FB,VR,297
8,GOOG,ADS,284
9,GOOG,HW,142


In [75]:
company_grp = baddf.groupby('Company') # Step 1: Create Group

In [76]:
company_grp

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x7fefa76562e8>

In [77]:
company_grp.mean() # Step 2: Apply aggregate functions

Unnamed: 0_level_0,sales
Company,Unnamed: 1_level_1
AMZN,288.5
FB,368.75
GOOG,311.25
MSFT,298.75


In [78]:
company_grp.sum() # More methods on groupby(Sum)

Unnamed: 0_level_0,sales
Company,Unnamed: 1_level_1
AMZN,1154
FB,1475
GOOG,1245
MSFT,1195


In [79]:
company_grp.std() # Even more method(Standard deviation)

Unnamed: 0_level_0,sales
Company,Unnamed: 1_level_1
AMZN,137.832023
FB,92.261856
GOOG,127.951488
MSFT,145.957471


In [80]:
company_grp.sum().loc['GOOG'] # Accessing values by index after applying aggregate methods

sales    1245
Name: GOOG, dtype: int64

In [81]:
baddf.groupby('Company').sum().loc['FB'] # Putting all the steps together

sales    1475
Name: FB, dtype: int64

In [82]:
baddf.groupby('Company').describe() # Tabulate various results

Unnamed: 0_level_0,sales,sales,sales,sales,sales,sales,sales,sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
AMZN,4.0,288.5,137.832023,103.0,238.75,310.0,359.75,431.0
FB,4.0,368.75,92.261856,297.0,298.5,343.0,413.25,492.0
GOOG,4.0,311.25,127.951488,142.0,248.5,339.5,402.25,424.0
MSFT,4.0,298.75,145.957471,120.0,214.5,313.0,397.25,449.0


## Concatenation
Cocatenate pandas object along a particular axis

In [83]:
x4_arr = lambda x,y: np.array([x for x in range(x,y)]).reshape(4,4)
columns = ['A', 'B', 'C', 'D']
df1 = pd.DataFrame(x4_arr(0, 16), index=[0, 1, 2, 3], columns=columns)
df2 = pd.DataFrame(x4_arr(16, 32), index=[4, 5, 6, 7], columns=columns)
df3 = pd.DataFrame(x4_arr(32, 48), index=[8, 9, 10, 11], columns=columns)

In [84]:
df1

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [85]:
df2

Unnamed: 0,A,B,C,D
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27
7,28,29,30,31


In [86]:
df3

Unnamed: 0,A,B,C,D
8,32,33,34,35
9,36,37,38,39
10,40,41,42,43
11,44,45,46,47


In [87]:
pd.concat([df1, df2, df3]) # Concatenation along index

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27
7,28,29,30,31
8,32,33,34,35
9,36,37,38,39


In [88]:
pd.concat([df1, df2, df3], axis=1) # Concatenation along columns

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,0.0,1.0,2.0,3.0,,,,,,,,
1,4.0,5.0,6.0,7.0,,,,,,,,
2,8.0,9.0,10.0,11.0,,,,,,,,
3,12.0,13.0,14.0,15.0,,,,,,,,
4,,,,,16.0,17.0,18.0,19.0,,,,
5,,,,,20.0,21.0,22.0,23.0,,,,
6,,,,,24.0,25.0,26.0,27.0,,,,
7,,,,,28.0,29.0,30.0,31.0,,,,
8,,,,,,,,,32.0,33.0,34.0,35.0
9,,,,,,,,,36.0,37.0,38.0,39.0


## Merging
Concatenate(?) over a key

In [89]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})  

In [90]:
left

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [91]:
right

Unnamed: 0,key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [92]:
pd.merge(left, right, how="inner", on="key") # Exclude all unmatching keys

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [93]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                     'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})
    
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                               'key2': ['K0', 'K0', 'K0', 'K0'],
                                  'C': ['C0', 'C1', 'C2', 'C3'],
                                  'D': ['D0', 'D1', 'D2', 'D3']})

In [94]:
left

Unnamed: 0,key1,key2,A,B
0,K0,K0,A0,B0
1,K0,K1,A1,B1
2,K1,K0,A2,B2
3,K2,K1,A3,B3


In [95]:
right

Unnamed: 0,key1,key2,C,D
0,K0,K0,C0,D0
1,K1,K0,C1,D1
2,K1,K0,C2,D2
3,K2,K0,C3,D3


In [96]:
pd.merge(left, right, on=['key1'])

Unnamed: 0,key1,key2_x,A,B,key2_y,C,D
0,K0,K0,A0,B0,K0,C0,D0
1,K0,K1,A1,B1,K0,C0,D0
2,K1,K0,A2,B2,K0,C1,D1
3,K1,K0,A2,B2,K0,C2,D2
4,K2,K1,A3,B3,K0,C3,D3


In [97]:
pd.merge(left, right, on=['key1', 'key2']) # Use list as key

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2


In [98]:
pd.merge(left, right, how='outer', on=['key1', 'key2']) # All key values from both dataframes, NaN for absent values

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K1,A3,B3,,
5,K2,K0,,,C3,D3


In [99]:
pd.merge(left, right, how='left', on=['key1', 'key2']) # All key values from left dataframe

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K1,A3,B3,,


In [100]:
pd.merge(left, right, how='right', on=['key1', 'key2']) # All key values from right dataframe

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2
3,K2,K0,,,C3,D3


## Joining
Join dataframe columns either along index or key columns

In [101]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])

In [102]:
left.join(right) # All rows of left with corresponding values of right

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [103]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


## Pivots
Reshape dataframe using pivots

In [104]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
     'B':['one','one','two','two','one','one'],
       'C':['x','y','x','y','x','y'],
       'D':[1,3,2,5,4,1]}

df = pd.DataFrame(data)

In [105]:
df

Unnamed: 0,A,B,C,D
0,foo,one,x,1
1,foo,one,y,3
2,foo,two,x,2
3,bar,two,y,5
4,bar,one,x,4
5,bar,one,y,1


In [106]:
df.pivot_table(values='D',index=['A', 'B'],columns=['C'])

Unnamed: 0_level_0,C,x,y
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4.0,1.0
bar,two,,5.0
foo,one,1.0,3.0
foo,two,2.0,
