In [39]:
import numpy as np
import pandas as pd

# Creating DataFrames

## Direct Creation

In [27]:
# simplest case is to create df from a dict[string:array] where the string 
# gives the column name and the array holds the corresponding values
# the index implicitely starts at 0
d = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [29]:
# a bit more advanced: two series each provided with an index
# the indices need not overlap (everwhere); however, the resulting df "aligns" them and missing values are set to NaN
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [31]:
# we can do the same with dates...
date1 = pd.date_range("2013-01-01", periods=6)
date2 = pd.date_range("2013-01-04", periods=6)

d = {
    "one": pd.Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], index=date1),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], index=date2),
}

df = pd.DataFrame(d)
df

Unnamed: 0,one,two
2013-01-01,1.0,
2013-01-02,2.0,
2013-01-03,3.0,
2013-01-04,4.0,1.0
2013-01-05,5.0,2.0
2013-01-06,6.0,3.0
2013-01-07,,4.0
2013-01-08,,5.0
2013-01-09,,6.0


## Date Range

In [43]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [45]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.043608,1.30857,0.690711,-0.397451
2013-01-02,0.809884,0.126781,1.448234,1.055346
2013-01-03,-0.961951,0.234793,0.621372,-1.014126
2013-01-04,1.79688,-0.631913,0.169362,-0.06652
2013-01-05,-0.902186,0.352005,-2.069226,0.527363
2013-01-06,-0.221769,1.060057,-0.029061,-0.419968


In [47]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

# Getting Data

Each column in a dataframe is a series

In [81]:
# get one column
df["A"]
# alternative: df.A

2013-01-01    1.043608
2013-01-02    0.809884
2013-01-03   -0.961951
2013-01-04    1.796880
2013-01-05   -0.902186
2013-01-06   -0.221769
Freq: D, Name: A, dtype: float64

In [83]:
# get several columns
df[["A", "C"]]

Unnamed: 0,A,C
2013-01-01,1.043608,0.690711
2013-01-02,0.809884,1.448234
2013-01-03,-0.961951,0.621372
2013-01-04,1.79688,0.169362
2013-01-05,-0.902186,-2.069226
2013-01-06,-0.221769,-0.029061


In [85]:
# get specific entries
df.iloc[3,2:4]

C    0.169362
D   -0.066520
Name: 2013-01-04 00:00:00, dtype: float64

In [65]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.043608,1.30857,0.690711,-0.397451
2013-01-02,0.809884,0.126781,1.448234,1.055346
2013-01-04,1.79688,-0.631913,0.169362,-0.06652
