In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

plt.style.use('ggplot')
pd.set_option('display.width', 5000)
pd.set_option('display.max_columns', 60)

plt.rcParams['figure.figsize'] = (15, 5)

# Series

In [18]:
labels = ['a', 'b', 'c', 'd', 'e']
s=pd.Series(np.random.randn(5), index=labels)
s

a   -1.710042
b    0.349722
c    0.602521
d    1.309201
e   -1.382597
dtype: float64

In [19]:
s['b']

0.34972163800105005

In [22]:
mapping = s.to_dict()
mapping

{'a': -1.7100424665181782,
 'b': 0.34972163800105005,
 'c': 0.60252116216063689,
 'd': 1.3092012290169912,
 'e': -1.3825966690379567}

In [27]:
s = pd.Series(mapping, index=['b', 'e', 'a', 'f'])
s

b    0.349722
e   -1.382597
a   -1.710042
f         NaN
dtype: float64

In [30]:
s[pd.notnull(s)]

b    0.349722
e   -1.382597
a   -1.710042
dtype: float64

In [31]:
s.dropna()


b    0.349722
e   -1.382597
a   -1.710042
dtype: float64

In [25]:
s[:3]

a   -1.710042
b    0.349722
c    0.602521
dtype: float64

In [32]:
s * 2

b    0.699443
e   -2.765193
a   -3.420085
f         NaN
dtype: float64

# Dataframes

In [37]:
df = pd.DataFrame({'a': np.random.randn(6),
                  'b': ["foo", "bar"] * 3,
                  'c': np.random.randn(6)})
df

Unnamed: 0,a,b,c
0,-0.847167,foo,-1.546572
1,0.615563,bar,0.31851
2,-1.006326,foo,0.921611
3,-0.269533,bar,-0.707554
4,0.126651,foo,0.327314
5,0.886539,bar,-0.189176


In [40]:
df['d']=range(6)
df['e']=4

In [41]:
df

Unnamed: 0,a,b,c,d,e
0,-0.847167,foo,-1.546572,0,4
1,0.615563,bar,0.31851,1,4
2,-1.006326,foo,0.921611,2,4
3,-0.269533,bar,-0.707554,3,4
4,0.126651,foo,0.327314,4,4
5,0.886539,bar,-0.189176,5,4


In [43]:
df[:3]

Unnamed: 0,a,b,c,d,e
0,-0.847167,foo,-1.546572,0,4
1,0.615563,bar,0.31851,1,4
2,-1.006326,foo,0.921611,2,4


In [45]:
df[-2:]

Unnamed: 0,a,b,c,d,e
4,0.126651,foo,0.327314,4,4
5,0.886539,bar,-0.189176,5,4


In [65]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [64]:
df.index.values

array([0, 1, 2, 3, 4, 5])

In [36]:
df.columns

Index([u'a', u'b', u'c'], dtype='object')

In [46]:
df.xs(0)

a   -0.847167
b         foo
c    -1.54657
d           0
e           4
Name: 0, dtype: object

In [47]:
df.ix[0,0]

-0.84716716161384475

In [48]:
df.ix[1]

a    0.615563
b         bar
c     0.31851
d           1
e           4
Name: 1, dtype: object

In [49]:
df.ix[1, 'b']

'bar'

In [67]:
df.ix[[0,2,3], 'a':'d']

Unnamed: 0,a,b,c,d
0,-0.847167,foo,-1.546572,0
2,-1.006326,foo,0.921611,2
3,-0.269533,bar,-0.707554,3


In [57]:
df.get_value(1,'b')

'bar'

In [56]:
df.get_values()

array([[-0.8471671616138448, 'foo', -1.5465720368344122, 0, 4],
       [0.6155631177213801, 'bar', 0.31851014716250753, 1, 4],
       [-1.0063264598173367, 'foo', 0.9216106070806644, 2, 4],
       [-0.26953301407967006, 'bar', -0.7075537364745367, 3, 4],
       [0.1266514588115936, 'foo', 0.32731415345750053, 4, 4],
       [0.8865387383149868, 'bar', -0.1891759230960727, 5, 4]], dtype=object)

In [69]:

df['c']>0

0    False
1     True
2     True
3    False
4     True
5    False
Name: c, dtype: bool

In [71]:
df.ix[df['c']>0]

Unnamed: 0,a,b,c,d,e
1,0.615563,bar,0.31851,1,4
2,-1.006326,foo,0.921611,2,4
4,0.126651,foo,0.327314,4,4


In [72]:
df[df['c']>0]

Unnamed: 0,a,b,c,d,e
1,0.615563,bar,0.31851,1,4
2,-1.006326,foo,0.921611,2,4
4,0.126651,foo,0.327314,4,4


In [77]:
df = pd.DataFrame({'a': np.random.randn(6),
                  'b': ["foo", "bar"] * 3,
                  'c': np.random.randn(6)},
                  index=pd.date_range('1/1/2000', periods=6))
df

Unnamed: 0,a,b,c
2000-01-01,0.056601,foo,0.968259
2000-01-02,-0.621411,bar,0.215674
2000-01-03,-0.438235,foo,1.26978
2000-01-04,-0.950255,bar,-0.088836
2000-01-05,0.635318,foo,1.058628
2000-01-06,0.602586,bar,0.279865


In [79]:
df = pd.DataFrame({'a': np.random.randn(6),
                  'b': ["foo", "bar"] * 3,
                  'c': np.random.randn(6)},
                  columns=['a', 'b', 'c', 'd'])
df

Unnamed: 0,a,b,c,d
0,1.59784,foo,0.199954,
1,-0.885875,bar,-0.96186,
2,1.175793,foo,0.089805,
3,1.822487,bar,-0.531786,
4,1.938755,foo,-2.456806,
5,-0.085507,bar,-0.159147,


In [82]:
data = {}
for col in ['foo', 'bar', 'baz']:
    for row in ['a', 'b', 'c', 'd']:
        data.setdefault(col, {})[row] = np.random.randn()
data

{'bar': {'a': 0.9857746755053611,
  'b': -0.212893235329203,
  'c': -0.4687737272424324,
  'd': -2.734836311499461},
 'baz': {'a': 0.2836868282576131,
  'b': 1.9809900762005792,
  'c': -0.47860703238638674,
  'd': -0.13933256729184165},
 'foo': {'a': 1.1840282391333417,
  'b': -1.3067182554857497,
  'c': 0.39201907923526486,
  'd': 0.003705188325622484}}

In [83]:
pd.DataFrame(data)

Unnamed: 0,bar,baz,foo
a,0.985775,0.283687,1.184028
b,-0.212893,1.98099,-1.306718
c,-0.468774,-0.478607,0.392019
d,-2.734836,-0.139333,0.003705
