## Setup

In [1]:
import pandas as pd
import numpy as np
import datetime

# turn of data table rendering (of the 90s)
pd.set_option('display.notebook_repr_html', False)   

## Creating a DataFrame

In [2]:
# Collection of data columns
s1 = np.random.randn(5)
s2 = [True, True, False, True, False]
s3 = ['Apple', 'Banana', 'Tomato', 'Bean', 'Rice']

# Dict with added column names
data = {'Randnum': s1, 'IsBool': s2, 'Name': s3}
df = pd.DataFrame(data)
df

    Randnum  IsBool    Name
0 -0.032206    True   Apple
1  0.875514    True  Banana
2 -1.464212   False  Tomato
3  0.097504    True    Bean
4  0.615402   False    Rice

In [3]:
print df.index
print df.columns

Int64Index([0, 1, 2, 3, 4], dtype='int64')
Index([u'IsBool', u'Name', u'Randnum'], dtype='object')


In [4]:
# Create a new column and assign it all 127
df['New'] = 127
df

  IsBool    Name   Randnum  New
0   True   Apple -0.155137  127
1   True  Banana -0.191264  127
2  False  Tomato  0.764165  127
3   True    Bean  1.818205  127
4  False    Rice -0.618209  127

## Data retrieval

In [5]:
# Select the Randnum column
df.Randnum

0   -0.155137
1   -0.191264
2    0.764165
3    1.818205
4   -0.618209
Name: Randnum, dtype: float64

In [6]:
# Select the first three rows
df[:3]

  IsBool    Name   Randnum  New
0   True   Apple -0.155137  127
1   True  Banana -0.191264  127
2  False  Tomato  0.764165  127

In [7]:
# Select the second row
df.ix[1]

IsBool          True
Name          Banana
Randnum   -0.1912644
New              127
Name: 1, dtype: object

In [8]:
# Select the Randnum value of the second row
df.ix[1, 2] # or
df.ix[1, 'Randnum']

-0.19126442079621636

In [9]:
# Selecting specific rows and columns
df.ix[[0, 2, 6], ['Name', 'Randnum', 'Unknown']]

     Name   Randnum  Unknown
0   Apple -0.155137      NaN
2  Tomato  0.764165      NaN
6     NaN       NaN      NaN

## Conditions

In [10]:
# Retrieve boolean Series, True if Randnum is smaller than zero
belowzero = df.Randnum < 0
belowzero

0     True
1     True
2    False
3    False
4     True
Name: Randnum, dtype: bool

In [11]:
# Selects all rows meeting the belowzero condition
df[belowzero]

  IsBool    Name   Randnum  New
0   True   Apple -0.155137  127
1   True  Banana -0.191264  127
4  False    Rice -0.618209  127

In [12]:
# Retrieve boolean Series, True if Randnum is smaller than zero
isapple = df['Name'] == 'Apple'
isapple

0     True
1    False
2    False
3    False
4    False
Name: Name, dtype: bool

In [13]:
# Select belowzero AND isapple conditions
df[belowzero & isapple]

  IsBool   Name   Randnum  New
0   True  Apple -0.155137  127

In [14]:
# Select belowzero OR isapple conditions
df[belowzero | isapple]

  IsBool    Name   Randnum  New
0   True   Apple -0.155137  127
1   True  Banana -0.191264  127
4  False    Rice -0.618209  127

## Date range as an index

In [15]:
# Set the index to a date range
df.index = pd.date_range('1-1-2015', periods=5, freq='d')
df.index.name = 'Date'
df

           IsBool    Name   Randnum  New
Date                                    
2015-01-01   True   Apple -0.155137  127
2015-01-02   True  Banana -0.191264  127
2015-01-03  False  Tomato  0.764165  127
2015-01-04   True    Bean  1.818205  127
2015-01-05  False    Rice -0.618209  127

## Nested dictionary to DataFrame

In [16]:
# Create a nested dictionary of equal inner value-count
data = {'Paris': {'N': 1.2, 'E': 4, 'S': 2.9, 'W': 0.8},
        'Amsterdam': {'N': 2.3, 'E': 1.7, 'S': 2.1, 'W': 7.2},
        'London': {'N': 9.7, 'E': 3.1, 'S': 7.2, 'W': 2}}

df2 = pd.DataFrame(data)
df2

   Amsterdam  London  Paris
E        1.7     3.1    4.0
N        2.3     9.7    1.2
S        2.1     7.2    2.9
W        7.2     2.0    0.8