In [1]:
import numpy as np
import pandas as pd

In [2]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.030652,0.292336,0.350921,0.170295
2013-01-02,1.682899,-1.979501,-0.407145,-0.823166
2013-01-03,-0.832073,0.342231,-0.509647,0.200867
2013-01-04,-0.816926,-0.300399,0.259802,-0.547469
2013-01-05,-1.488883,-0.835526,0.333843,-1.910065
2013-01-06,0.4156,-1.526662,0.579523,1.471637


In [4]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

## Getting Data

In [5]:
df["A"]
# alternative: df.A

2013-01-01   -0.030652
2013-01-02    1.682899
2013-01-03   -0.832073
2013-01-04   -0.816926
2013-01-05   -1.488883
2013-01-06    0.415600
Freq: D, Name: A, dtype: float64

In [6]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.030652,0.292336,0.350921,0.170295
2013-01-02,1.682899,-1.979501,-0.407145,-0.823166
2013-01-03,-0.832073,0.342231,-0.509647,0.200867


In [7]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-0.030652,0.292336
2013-01-02,1.682899,-1.979501
2013-01-03,-0.832073,0.342231
2013-01-04,-0.816926,-0.300399
2013-01-05,-1.488883,-0.835526
2013-01-06,0.4156,-1.526662


In [10]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-02,1.682899,-1.979501,-0.407145,-0.823166
2013-01-06,0.4156,-1.526662,0.579523,1.471637


## Statistics / Operations on Data

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.178339,-0.66792,0.101216,-0.23965
std,1.130617,0.955337,0.447656,1.14159
min,-1.488883,-1.979501,-0.509647,-1.910065
25%,-0.828287,-1.353878,-0.240408,-0.754242
50%,-0.423789,-0.567963,0.296822,-0.188587
75%,0.304037,0.144152,0.346651,0.193224
max,1.682899,0.342231,0.579523,1.471637


In [12]:
df.mean()

A   -0.178339
B   -0.667920
C    0.101216
D   -0.239650
dtype: float64

In [13]:
df.mean(1)

2013-01-01    0.195725
2013-01-02   -0.381728
2013-01-03   -0.199656
2013-01-04   -0.351248
2013-01-05   -0.975158
2013-01-06    0.235024
Freq: D, dtype: float64

In [14]:
df.apply(lambda x: np.cumsum(x))

Unnamed: 0,A,B,C,D
2013-01-01,-0.030652,0.292336,0.350921,0.170295
2013-01-02,1.652248,-1.687165,-0.056224,-0.65287
2013-01-03,0.820174,-1.344934,-0.565871,-0.452004
2013-01-04,0.003248,-1.645333,-0.306069,-0.999473
2013-01-05,-1.485635,-2.480859,0.027774,-2.909538
2013-01-06,-1.070035,-4.007521,0.607297,-1.437901


In [15]:
df.A.value_counts() # probably not so useful with random floats...

-0.030652    1
 1.682899    1
-0.832073    1
-0.816926    1
-1.488883    1
 0.415600    1
Name: A, dtype: int64

In [16]:
#arr = np.random.randn(20)
# factor = pd.cut(arr, 4)

# factor = pd.cut(df.A, 3)
factor = pd.cut(df.A, [-3,-1,0,1,3])
pd.value_counts(factor)

(-1, 0]     3
(-3, -1]    1
(0, 1]      1
(1, 3]      1
Name: A, dtype: int64