In [1]:
#We will take a small sample of data like that stored in the database and try to convert it into the dataframe format

import pandas as pd
from pandas import DataFrame
import numpy as np

In [12]:
#Creating database like data i.e. in long/stacked format

dates = ['1959-03-31 00:00:00', '1959-06-30 00:00:00', '1959-09-30 00:00:00'] * 3

In [14]:
dates.sort()
dates

['1959-03-31 00:00:00',
 '1959-03-31 00:00:00',
 '1959-03-31 00:00:00',
 '1959-06-30 00:00:00',
 '1959-06-30 00:00:00',
 '1959-06-30 00:00:00',
 '1959-09-30 00:00:00',
 '1959-09-30 00:00:00',
 '1959-09-30 00:00:00']

In [15]:
item = ['realgdp', 'infl', 'unemp'] * 3

In [16]:
item

['realgdp',
 'infl',
 'unemp',
 'realgdp',
 'infl',
 'unemp',
 'realgdp',
 'infl',
 'unemp']

In [17]:
value = [2710.349, 0.000, 5.800, 2778.801, 2.340, 5.100, 2775.488, 2.740, 5.300]

In [18]:
value

[2710.349, 0.0, 5.8, 2778.801, 2.34, 5.1, 2775.488, 2.74, 5.3]

In [19]:
#Creating a stack form of data as in a database 
data = DataFrame({'date':dates, 'item':item, 'value':value})

In [20]:
data

Unnamed: 0,date,item,value
0,1959-03-31 00:00:00,realgdp,2710.349
1,1959-03-31 00:00:00,infl,0.0
2,1959-03-31 00:00:00,unemp,5.8
3,1959-06-30 00:00:00,realgdp,2778.801
4,1959-06-30 00:00:00,infl,2.34
5,1959-06-30 00:00:00,unemp,5.1
6,1959-09-30 00:00:00,realgdp,2775.488
7,1959-09-30 00:00:00,infl,2.74
8,1959-09-30 00:00:00,unemp,5.3


In [22]:
#Now this format won't be easy to operate in case of big datasets rather we would want to create a unique column for each unique item in the data

#To perform such an operation, we have a pivot method for DataFrames

pivoted = data.pivot('date','item','value')

In [23]:
pivoted

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 00:00:00,0.0,2710.349,5.8
1959-06-30 00:00:00,2.34,2778.801,5.1
1959-09-30 00:00:00,2.74,2775.488,5.3


In [26]:
#The pivot method uses starting two arguments as row and column indexes respectively and a third value to fill the dataframe

#Suppose we had another value in such a table which we also wanted to reshape then
data['value2'] = np.random.randn(len(value))

In [27]:
data

Unnamed: 0,date,item,value,value2
0,1959-03-31 00:00:00,realgdp,2710.349,0.150307
1,1959-03-31 00:00:00,infl,0.0,0.488579
2,1959-03-31 00:00:00,unemp,5.8,0.374709
3,1959-06-30 00:00:00,realgdp,2778.801,2.687179
4,1959-06-30 00:00:00,infl,2.34,-1.056655
5,1959-06-30 00:00:00,unemp,5.1,-1.208081
6,1959-09-30 00:00:00,realgdp,2775.488,-0.837135
7,1959-09-30 00:00:00,infl,2.74,-1.439765
8,1959-09-30 00:00:00,unemp,5.3,0.930443


In [28]:
#Omitting the last argument i.e. value in the last method, we can obtain a hierarchical format for given data
data.pivot('date','item')

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31 00:00:00,0.0,2710.349,5.8,0.488579,0.150307,0.374709
1959-06-30 00:00:00,2.34,2778.801,5.1,-1.056655,2.687179,-1.208081
1959-09-30 00:00:00,2.74,2775.488,5.3,-1.439765,-0.837135,0.930443


In [29]:
#From this we can obtain whichever value we want
pivoted = data.pivot('date','item')
pivoted['value2']

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 00:00:00,0.488579,0.150307,0.374709
1959-06-30 00:00:00,-1.056655,2.687179,-1.208081
1959-09-30 00:00:00,-1.439765,-0.837135,0.930443


In [30]:
#This format in which each value is seperated by its unique columns is known as the "wide" format
#The pivot method in actual is a shortcut for using set_index with the unstack() method

unstack_data = data.set_index(['date','item']).unstack('item')

In [31]:
unstack_data

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31 00:00:00,0.0,2710.349,5.8,0.488579,0.150307,0.374709
1959-06-30 00:00:00,2.34,2778.801,5.1,-1.056655,2.687179,-1.208081
1959-09-30 00:00:00,2.74,2775.488,5.3,-1.439765,-0.837135,0.930443
