# Démarrer avec pandas

## Structure de données

In [14]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [4]:
obj = Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [5]:
obj.values

array([ 4,  7, -5,  3], dtype=int64)

In [6]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
obj2 = Series([4,7,-5,3],index=['d','b','a','c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [8]:
obj2['a']

-5

In [9]:
obj2[['a','c']]

a   -5
c    3
dtype: int64

In [11]:
obj2[obj2>0]

d    4
b    7
c    3
dtype: int64

In [12]:
obj*2

0     8
1    14
2   -10
3     6
dtype: int64

In [15]:
np.exp(obj2)

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [16]:
'b' in obj2

True

In [17]:
pd.isnull(obj2)

d    False
b    False
a    False
c    False
dtype: bool

In [18]:
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [19]:
obj2.name='Trial'

In [21]:
obj2.index.name='Entrepot'

In [26]:
lis = []
lis = obj2.index.values
lis = 'Entrepot ' + lis
obj2.index = lis
obj2

Entrepot d    4
Entrepot b    7
Entrepot a   -5
Entrepot c    3
Name: Trial, dtype: int64

## DataFrame

In [27]:
data = {'State':['Ohio','Ohio','Ohio','Nevada','Nevada'],
        'Year':[2000,2001,2002,2001,2002],
        'pop':[1.5,1.7,3.6,2.4,2.9]}
frame = DataFrame(data)


In [28]:
frame

Unnamed: 0,State,Year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [31]:
frame2 = DataFrame(data,
                   columns=['Year','State','pop','Debt'],
                   index=['one','two','three','four','five',])
frame2

Unnamed: 0,Year,State,pop,Debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


In [32]:
frame2.columns

Index(['Year', 'State', 'pop', 'Debt'], dtype='object')

In [33]:
frame2['State']

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: State, dtype: object

In [35]:
frame2.Year

one      2000
two      2001
three    2002
four     2001
five     2002
Name: Year, dtype: int64

In [37]:
frame2.loc['three']

Year     2002
State    Ohio
pop       3.6
Debt      NaN
Name: three, dtype: object

In [39]:
frame2.Debt = np.arange(5)
frame2

Unnamed: 0,Year,State,pop,Debt
one,2000,Ohio,1.5,0
two,2001,Ohio,1.7,1
three,2002,Ohio,3.6,2
four,2001,Nevada,2.4,3
five,2002,Nevada,2.9,4


In [41]:
val = Series([-1.2,-1.5,-1.7],index=['two','four','five'])
frame2['New'] = float('nan')
frame2.New = val
frame2

Unnamed: 0,Year,State,pop,Debt,New
one,2000,Ohio,1.5,0,
two,2001,Ohio,1.7,1,-1.2
three,2002,Ohio,3.6,2,
four,2001,Nevada,2.4,3,-1.5
five,2002,Nevada,2.9,4,-1.7


In [43]:
frame2['eastern'] = frame2.State == 'Ohio'
frame2

Unnamed: 0,Year,State,pop,Debt,New,eastern
one,2000,Ohio,1.5,0,,True
two,2001,Ohio,1.7,1,-1.2,True
three,2002,Ohio,3.6,2,,True
four,2001,Nevada,2.4,3,-1.5,False
five,2002,Nevada,2.9,4,-1.7,False


In [45]:
del frame2['eastern']
frame2

Unnamed: 0,Year,State,pop,Debt,New
one,2000,Ohio,1.5,0,
two,2001,Ohio,1.7,1,-1.2
three,2002,Ohio,3.6,2,
four,2001,Nevada,2.4,3,-1.5
five,2002,Nevada,2.9,4,-1.7


In [46]:
pop = {'Nevada':{2001:2.4,2002:2.9},'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
frame3 = DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [47]:
frame3.T

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.7,3.6


In [48]:
frame3.index.name='year'
frame3.columns.name='state'
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [49]:
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])

## Les index

In [51]:
idx = frame.index
idx

RangeIndex(start=0, stop=5, step=1)

In [52]:
idx.is_monotonic

True

In [53]:
idx.is_unique

True

In [55]:
idx.isin(np.arange(0,5))

array([ True,  True,  True,  True,  True])

In [57]:
obj = Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [60]:
obj.reindex(['a','b','c','d','e'],fill_value=0)

a   -5.3
b    7.2
c    3.6
d    4.5
e    0.0
dtype: float64

In [62]:
obj3 = Series(['blue','purple','yellow'],index=[0,2,4])
obj3.reindex(range(6),method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [63]:
obj3.reindex(range(6),method='bfill')

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

In [73]:
obj4 = obj3.reindex(range(6),method='bfill')
obj5 = obj3.reindex(range(6),method='ffill')
frame = DataFrame(np.empty((6,2))*np.nan,index=range(6),columns=['Couleur1','Couleur2'])
frame

Unnamed: 0,Couleur1,Couleur2
0,,
1,,
2,,
3,,
4,,
5,,


In [74]:
frame.Couleur1 = obj3.reindex(range(6),method='bfill')
frame.Couleur2 = obj3.reindex(range(6),method='ffill')
frame


Unnamed: 0,Couleur1,Couleur2
0,blue,blue
1,purple,blue
2,purple,purple
3,yellow,purple
4,yellow,yellow
5,,yellow


In [76]:
couleurs = ['Couleur1','Couleur12','Couleur2']
frame.reindex(columns=couleurs)

Unnamed: 0,Couleur1,Couleur12,Couleur2
0,blue,,blue
1,purple,,blue
2,purple,,purple
3,yellow,,purple
4,yellow,,yellow
5,,,yellow


In [80]:
frame.reindex(index=['a','b','c','d','e','f'],columns=couleurs)

Unnamed: 0,Couleur1,Couleur12,Couleur2
a,,,
b,,,
c,,,
d,,,
e,,,
f,,,


In [81]:
frame.Couleur1 = obj3.reindex(range(6),method='bfill')
frame.Couleur2 = obj3.reindex(range(6),method='ffill')
frame

Unnamed: 0,Couleur1,Couleur2
0,blue,blue
1,purple,blue
2,purple,purple
3,yellow,purple
4,yellow,yellow
5,,yellow


## Supprimer les entrées d'un axe

In [83]:
newObj = frame.drop(5,axis=0)
newObj

Unnamed: 0,Couleur1,Couleur2
0,blue,blue
1,purple,blue
2,purple,purple
3,yellow,purple
4,yellow,yellow


## Sélectionner, filtrer

In [85]:
newObj.loc[1:3,:]

Unnamed: 0,Couleur1,Couleur2
1,purple,blue
2,purple,purple
3,yellow,purple


In [88]:
newObj.iloc[1]

Couleur1    purple
Couleur2      blue
Name: 1, dtype: object

In [96]:
newObj.iloc[:,1]

0      blue
1      blue
2    purple
3    purple
4    yellow
Name: Couleur2, dtype: object

In [97]:
newObj.loc[:,'Couleur2']

0      blue
1      blue
2    purple
3    purple
4    yellow
Name: Couleur2, dtype: object

In [99]:
newObj.loc[newObj.Couleur1=='blue',:]

Unnamed: 0,Couleur1,Couleur2
0,blue,blue


In [102]:
newObj.loc[newObj.Couleur1=='blue','Couleur1']

0    blue
Name: Couleur1, dtype: object

In [101]:
newObj.loc[:,newObj.iloc[1,:]=='blue']

Unnamed: 0,Couleur2
0,blue
1,blue
2,purple
3,purple
4,yellow


In [103]:
newObj.loc[1:3,newObj.iloc[1,:]=='blue']

Unnamed: 0,Couleur2
1,blue
2,purple
3,purple


In [108]:
newObj.iloc[2,:]

Couleur1    purple
Couleur2    purple
Name: 2, dtype: object

In [109]:
newObj.iloc[2,:][0]

'purple'

## Fonctions Lambda

In [122]:
frame = DataFrame(np.random.randn(4,3),
                  columns=list('bde'),
                  index=['Utah','Ohio','Texas','Oregon'])
frame

Unnamed: 0,b,d,e
Utah,1.473356,-0.445247,1.589189
Ohio,0.176565,-0.018599,0.988118
Texas,-1.246975,-0.236043,-0.943573
Oregon,-1.448079,-0.044408,1.251633


In [123]:
f = lambda x: x.max() - x.min()

In [124]:
frame.apply(f)

b    2.921434
d    0.426647
e    2.532762
dtype: float64

In [125]:
frame.apply(f,axis=1)

Utah      2.034436
Ohio      1.006718
Texas     1.010932
Oregon    2.699712
dtype: float64

In [127]:
def f(x):
    return Series([x.min(),x.max()],index=['min','max'])


In [128]:
frame.apply(f)

Unnamed: 0,b,d,e
min,-1.448079,-0.445247,-0.943573
max,1.473356,-0.018599,1.589189


In [129]:
frame.apply(f,axis=1)

Unnamed: 0,min,max
Utah,-0.445247,1.589189
Ohio,-0.018599,0.988118
Texas,-1.246975,-0.236043
Oregon,-1.448079,1.251633


In [132]:
formatage = lambda x: '%.2f' % x

In [133]:
frame.applymap(formatage)

Unnamed: 0,b,d,e
Utah,1.47,-0.45,1.59
Ohio,0.18,-0.02,0.99
Texas,-1.25,-0.24,-0.94
Oregon,-1.45,-0.04,1.25


In [134]:
frame['e'].map(formatage)

Utah       1.59
Ohio       0.99
Texas     -0.94
Oregon     1.25
Name: e, dtype: object

## Trier et classer

In [135]:
frame.sort_index(0)

Unnamed: 0,b,d,e
Ohio,0.176565,-0.018599,0.988118
Oregon,-1.448079,-0.044408,1.251633
Texas,-1.246975,-0.236043,-0.943573
Utah,1.473356,-0.445247,1.589189


In [137]:
frame.sort_index(axis=1,ascending=False)

Unnamed: 0,e,d,b
Utah,1.589189,-0.445247,1.473356
Ohio,0.988118,-0.018599,0.176565
Texas,-0.943573,-0.236043,-1.246975
Oregon,1.251633,-0.044408,-1.448079


In [139]:
obj = Series([4,np.nan,7,np.nan,-3,2])
obj.sort_values()

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [141]:
frame.sort_values(by='b')

Unnamed: 0,b,d,e
Oregon,-1.448079,-0.044408,1.251633
Texas,-1.246975,-0.236043,-0.943573
Ohio,0.176565,-0.018599,0.988118
Utah,1.473356,-0.445247,1.589189


In [143]:
frame.sort_values(by='d',ascending=False)

Unnamed: 0,b,d,e
Ohio,0.176565,-0.018599,0.988118
Oregon,-1.448079,-0.044408,1.251633
Texas,-1.246975,-0.236043,-0.943573
Utah,1.473356,-0.445247,1.589189


In [144]:
frame.sort_values(by=['d','b'],ascending=[False,True])

Unnamed: 0,b,d,e
Ohio,0.176565,-0.018599,0.988118
Oregon,-1.448079,-0.044408,1.251633
Texas,-1.246975,-0.236043,-0.943573
Utah,1.473356,-0.445247,1.589189


In [145]:
obj

0    4.0
1    NaN
2    7.0
3    NaN
4   -3.0
5    2.0
dtype: float64

In [146]:
obj.rank()

0    3.0
1    NaN
2    4.0
3    NaN
4    1.0
5    2.0
dtype: float64

In [147]:
frame.rank(axis=0)

Unnamed: 0,b,d,e
Utah,4.0,1.0,4.0
Ohio,3.0,4.0,2.0
Texas,2.0,2.0,1.0
Oregon,1.0,3.0,3.0


In [148]:
frame.rank(axis=1)

Unnamed: 0,b,d,e
Utah,2.0,1.0,3.0
Ohio,2.0,1.0,3.0
Texas,1.0,3.0,2.0
Oregon,1.0,2.0,3.0


In [154]:
frame.loc['Floride']=[4,7,9]
frame

Unnamed: 0,b,d,e
Utah,1.473356,-0.445247,1.589189
Ohio,1.0,2.0,3.0
Texas,-1.246975,-0.236043,-0.943573
Oregon,-1.448079,-0.044408,1.251633
Floride,4.0,7.0,9.0


In [156]:
frame.index.is_unique

True

In [164]:
frame = frame.append(Series([5,5,5],index=frame.columns,name='Mississipi'))
frame

Unnamed: 0,b,d,e
Utah,1.473356,-0.445247,1.589189
Ohio,1.0,2.0,3.0
Texas,-1.246975,-0.236043,-0.943573
Oregon,-1.448079,-0.044408,1.251633
Floride,4.0,7.0,9.0
Mississipi,5.0,5.0,5.0
Mississipi,5.0,5.0,5.0
Mississipi,5.0,5.0,5.0


In [166]:
frame = frame.drop('Mississipi')
frame

Unnamed: 0,b,d,e
Utah,1.473356,-0.445247,1.589189
Ohio,1.0,2.0,3.0
Texas,-1.246975,-0.236043,-0.943573
Oregon,-1.448079,-0.044408,1.251633
Floride,4.0,7.0,9.0


In [167]:
frame = frame.append(Series([5,5,5],index=frame.columns,name='Mississipi'))
frame

Unnamed: 0,b,d,e
Utah,1.473356,-0.445247,1.589189
Ohio,1.0,2.0,3.0
Texas,-1.246975,-0.236043,-0.943573
Oregon,-1.448079,-0.044408,1.251633
Floride,4.0,7.0,9.0
Mississipi,5.0,5.0,5.0


## Cumul et statistiques descriptives

In [168]:
frame.sum()

b     8.778301
d    13.274302
e    18.897248
dtype: float64

In [169]:
frame.sum(axis=1)

Utah           2.617298
Ohio           6.000000
Texas         -2.426592
Oregon        -0.240854
Floride       20.000000
Mississipi    15.000000
dtype: float64

In [170]:
frame.mean(axis=1,skipna=False)

Utah          0.872433
Ohio          2.000000
Texas        -0.808864
Oregon       -0.080285
Floride       6.666667
Mississipi    5.000000
dtype: float64

In [172]:
frame.idxmax()

b    Mississipi
d       Floride
e       Floride
dtype: object

In [173]:
frame.idxmax(axis=1)

Utah          e
Ohio          e
Texas         d
Oregon        e
Floride       e
Mississipi    b
dtype: object

In [174]:
frame.cumsum()

Unnamed: 0,b,d,e
Utah,1.473356,-0.445247,1.589189
Ohio,2.473356,1.554753,4.589189
Texas,1.22638,1.31871,3.645615
Oregon,-0.221699,1.274302,4.897248
Floride,3.778301,8.274302,13.897248
Mississipi,8.778301,13.274302,18.897248


In [175]:
frame.cumsum(axis=1)

Unnamed: 0,b,d,e
Utah,1.473356,1.028109,2.617298
Ohio,1.0,3.0,6.0
Texas,-1.246975,-1.483018,-2.426592
Oregon,-1.448079,-1.492487,-0.240854
Floride,4.0,11.0,20.0
Mississipi,5.0,10.0,15.0


In [176]:
frame.describe()

Unnamed: 0,b,d,e
count,6.0,6.0,6.0
mean,1.46305,2.212384,3.149541
std,2.644979,3.126918,3.477493
min,-1.448079,-0.445247,-0.943573
25%,-0.685232,-0.188134,1.336022
50%,1.236678,0.977796,2.294594
75%,3.368339,4.25,4.5
max,5.0,7.0,9.0


In [178]:
frame.diff()

Unnamed: 0,b,d,e
Utah,,,
Ohio,-0.473356,2.445247,1.410811
Texas,-2.246975,-2.236043,-3.943573
Oregon,-0.201104,0.191635,2.195206
Floride,5.448079,7.044408,7.748367
Mississipi,1.0,-2.0,-4.0


## Corrélation et covariance

In [184]:
import pandas_datareader.data as web
from datetime import datetime
start = datetime(2017, 5, 9)
end = datetime(2017, 5, 24)

f = web.DataReader('F', 'iex', start, end)
f
#all_data = {}
#for ticker in ['AAPL','IBM','MSFT','GOOG']:
#    all_data[ticker] = pdr.get_data_yahoo(ticker,'1/1/2000','1/1/2010')


1y


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-05-09,10.6043,10.7138,10.5662,10.6233,44142864
2017-05-10,10.6233,10.6614,10.4996,10.5091,34989254
2017-05-11,10.5091,10.5377,10.4425,10.4805,29276286
2017-05-12,10.471,10.4996,10.3758,10.3949,34264277
2017-05-15,10.4329,10.4996,10.3949,10.4139,27359494
2017-05-16,10.5281,10.5377,10.3758,10.4139,42130215
2017-05-17,10.3568,10.4234,10.1569,10.2426,54670105
2017-05-18,10.214,10.2997,10.1855,10.2711,37890811
2017-05-19,10.2902,10.4139,10.2711,10.3473,32431694
2017-05-22,10.5757,10.5948,10.452,10.5662,73334997


In [188]:
f.loc['2017-05-22',:]


open      1.057570e+01
high      1.059480e+01
low       1.045200e+01
close     1.056620e+01
volume    7.333500e+07
Name: 2017-05-22, dtype: float64

In [189]:
f.volume.corr(f.high)

0.16642768885448253

In [190]:
f.corr()

Unnamed: 0,open,high,low,close,volume
open,1.0,0.954938,0.871864,0.86703,0.218847
high,0.954938,1.0,0.926259,0.919931,0.166428
low,0.871864,0.926259,1.0,0.961923,-0.023862
close,0.86703,0.919931,0.961923,1.0,0.189893
volume,0.218847,0.166428,-0.023862,0.189893,1.0


In [191]:
f.cov()

Unnamed: 0,open,high,low,close,volume
open,0.017765,0.014597,0.014126,0.013097,369118.4
high,0.014597,0.013153,0.012913,0.011957,241537.2
low,0.014126,0.012913,0.014778,0.013253,-36707.05
close,0.013097,0.011957,0.013253,0.012845,272346.1
volume,369118.386191,241537.153582,-36707.050059,272346.111641,160138900000000.0


## Valeurs uniques

In [192]:
objet = Series(['a','b','c','a','b','c','a','b','c','a','b','c'])

In [197]:
uniques = objet.unique()
uniques.sort()
uniques


array(['a', 'b', 'c'], dtype=object)

In [198]:
objet.value_counts()

a    4
c    4
b    4
dtype: int64

In [200]:
mask = objet.isin(['b','c'])
mask

0     False
1      True
2      True
3     False
4      True
5      True
6     False
7      True
8      True
9     False
10     True
11     True
dtype: bool

In [206]:
data = DataFrame({'Ele1':[1,3,4,3,4],
                  'Ele2':[2,3,1,2,3],
                 'Ele3':[1,5,2,4,4],
                 'Ele4':[1,1,1,1,4]},
                 index=['Maths','Litterature','Informatique','Statistiques','Business'])
data

Unnamed: 0,Ele1,Ele2,Ele3,Ele4
Maths,1,2,1,1
Litterature,3,3,5,1
Informatique,4,1,2,1
Statistiques,3,2,4,1
Business,4,3,4,4


In [207]:
data.Ele1.value_counts()

4    2
3    2
1    1
Name: Ele1, dtype: int64

In [209]:
result = data.apply(pd.value_counts)
result

Unnamed: 0,Ele1,Ele2,Ele3,Ele4
1,1.0,1.0,1.0,4.0
2,,2.0,1.0,
3,2.0,2.0,,
4,2.0,,2.0,1.0
5,,,1.0,


In [210]:
result = data.apply(pd.value_counts).fillna(0)
result

Unnamed: 0,Ele1,Ele2,Ele3,Ele4
1,1.0,1.0,1.0,4.0
2,0.0,2.0,1.0,0.0
3,2.0,2.0,0.0,0.0
4,2.0,0.0,2.0,1.0
5,0.0,0.0,1.0,0.0


## Gérer les données manquantes

In [211]:
string_data = Series(['a',np.nan,'c'])
string_data.isnull()

0    False
1     True
2    False
dtype: bool

In [212]:
string_data_na = string_data.dropna()
string_data_na

0    a
2    c
dtype: object

In [213]:
string_data[string_data.notnull()]

0    a
2    c
dtype: object

In [216]:
data = DataFrame([[1,2],[1,np.nan],[np.nan,np.nan]])
data

Unnamed: 0,0,1
0,1.0,2.0
1,1.0,
2,,


In [217]:
data.dropna(how='all')

Unnamed: 0,0,1
0,1.0,2.0
1,1.0,


In [218]:
data.dropna(axis=1,how='all')

Unnamed: 0,0,1
0,1.0,2.0
1,1.0,
2,,


In [220]:
data.dropna(axis=1,thresh=2)

Unnamed: 0,0
0,1.0
1,1.0
2,


## Remplir les données manquantes

In [221]:
data2 = DataFrame([[1,2,np.nan],[1,np.nan,3],[np.nan,np.nan,4],[np.nan,np.nan,np.nan]])
data2

Unnamed: 0,0,1,2
0,1.0,2.0,
1,1.0,,3.0
2,,,4.0
3,,,


In [228]:
data2.fillna({0:0,1:-1,2:1})

Unnamed: 0,0,1,2
0,1.0,2.0,1.0
1,1.0,-1.0,3.0
2,0.0,-1.0,4.0
3,0.0,-1.0,1.0


In [229]:
data3 = DataFrame([[1,2,np.nan],[1,np.nan,3],[np.nan,np.nan,4],[np.nan,np.nan,np.nan]])
data3

Unnamed: 0,0,1,2
0,1.0,2.0,
1,1.0,,3.0
2,,,4.0
3,,,


In [231]:
data3.fillna(method='ffill',limit=2)

Unnamed: 0,0,1,2
0,1.0,2.0,
1,1.0,2.0,3.0
2,1.0,2.0,4.0
3,1.0,,4.0


In [234]:
data3.fillna(method='bfill')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,1.0,,3.0
2,,,4.0
3,,,


In [233]:
data3.fillna(data3.mean())

Unnamed: 0,0,1,2
0,1.0,2.0,3.5
1,1.0,2.0,3.0
2,1.0,2.0,4.0
3,1.0,2.0,3.5


## Indexation hiérarchique

In [237]:
data = Series(np.random.randn(10),
             index=[['a','a','a','b','b','b','c','c','d','d'],
                    [1,2,3,1,2,3,1,2,2,3]])

In [238]:
data

a  1   -0.035591
   2   -1.462311
   3    0.416143
b  1    1.280078
   2    1.367893
   3   -0.179184
c  1   -0.789076
   2   -0.196300
d  2   -1.174597
   3    0.518911
dtype: float64

In [239]:
data.index

MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])

In [240]:
data['b']

1    1.280078
2    1.367893
3   -0.179184
dtype: float64

In [241]:
data['b':'c']

b  1    1.280078
   2    1.367893
   3   -0.179184
c  1   -0.789076
   2   -0.196300
dtype: float64

In [242]:
data[:,2]

a   -1.462311
b    1.367893
c   -0.196300
d   -1.174597
dtype: float64

In [244]:
newFrame = data.unstack()
newFrame

Unnamed: 0,1,2,3
a,-0.035591,-1.462311,0.416143
b,1.280078,1.367893,-0.179184
c,-0.789076,-0.1963,
d,,-1.174597,0.518911


In [245]:
newFrame.stack()

a  1   -0.035591
   2   -1.462311
   3    0.416143
b  1    1.280078
   2    1.367893
   3   -0.179184
c  1   -0.789076
   2   -0.196300
d  2   -1.174597
   3    0.518911
dtype: float64

In [246]:
data.swaplevel(0,1)

1  a   -0.035591
2  a   -1.462311
3  a    0.416143
1  b    1.280078
2  b    1.367893
3  b   -0.179184
1  c   -0.789076
2  c   -0.196300
   d   -1.174597
3  d    0.518911
dtype: float64

In [250]:
data.sort_index(level=0)

a  1   -0.035591
   2   -1.462311
   3    0.416143
b  1    1.280078
   2    1.367893
   3   -0.179184
c  1   -0.789076
   2   -0.196300
d  2   -1.174597
   3    0.518911
dtype: float64

In [251]:
data.sort_index(level=1)

a  1   -0.035591
b  1    1.280078
c  1   -0.789076
a  2   -1.462311
b  2    1.367893
c  2   -0.196300
d  2   -1.174597
a  3    0.416143
b  3   -0.179184
d  3    0.518911
dtype: float64

In [252]:
data.sum(level=0)

a   -1.081759
b    2.468787
c   -0.985375
d   -0.655686
dtype: float64

In [253]:
data.sum(level=1)

1    0.455411
2   -1.465314
3    0.755870
dtype: float64

In [256]:
data.iloc[7]

-0.19629988607335458

In [262]:
data3D = pd.Panel([[[1,2],[3,4]],[[5,6],[7,8]]])

In [263]:
data3D

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 2 (major_axis) x 2 (minor_axis)
Items axis: 0 to 1
Major_axis axis: 0 to 1
Minor_axis axis: 0 to 1

In [264]:
data3D[0]

Unnamed: 0,0,1
0,1,2
1,3,4


In [265]:
data3D[1]

Unnamed: 0,0,1
0,5,6
1,7,8


In [266]:
data3D.iloc[:,1,:]

Unnamed: 0,0,1
0,3,7
1,4,8


In [269]:
stacked = data3D.to_frame()

In [270]:
stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
major,minor,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,1,5
0,1,2,6
1,0,3,7
1,1,4,8


In [271]:
stacked.to_panel()

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  """Entry point for launching an IPython kernel.


<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 2 (major_axis) x 2 (minor_axis)
Items axis: 0 to 1
Major_axis axis: 0 to 1
Minor_axis axis: 0 to 1