In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [3]:
frame = DataFrame([[1.4, np.nan], [7, -2], [np.nan, np.nan], [0.7, -1.6]],
                 index=list("abcd"), columns=["one", "two"])
frame

Unnamed: 0,one,two
a,1.4,
b,7.0,-2.0
c,,
d,0.7,-1.6


In [4]:
frame.sum()

one    9.1
two   -3.6
dtype: float64

In [5]:
frame.sum(axis=1)

a    1.4
b    5.0
c    0.0
d   -0.9
dtype: float64

In [6]:
frame.mean(axis=1, skipna=False)

a     NaN
b    2.50
c     NaN
d   -0.45
dtype: float64

## Returning an index instead of a value ##

In [7]:
frame.idxmax()

one    b
two    d
dtype: object

In [8]:
frame.idxmin(axis=1)

a    one
b    two
c    NaN
d    two
dtype: object

## Accumulations ##

In [9]:
frame

Unnamed: 0,one,two
a,1.4,
b,7.0,-2.0
c,,
d,0.7,-1.6


In [12]:
frame.cumsum()

Unnamed: 0,one,two
a,1.4,
b,8.4,-2.0
c,,
d,9.1,-3.6


## Descriptions ##

In [13]:
frame.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.033333,-1.8
std,3.453018,0.282843
min,0.7,-2.0
25%,1.05,-1.9
50%,1.4,-1.8
75%,4.2,-1.7
max,7.0,-1.6


In [14]:
obj = Series(list("aabc")*4)
obj

0     a
1     a
2     b
3     c
4     a
5     a
6     b
7     c
8     a
9     a
10    b
11    c
12    a
13    a
14    b
15    c
dtype: object

In [16]:
obj.describe()

count     16
unique     3
top        a
freq       8
dtype: object

## More ##

http://pandas.pydata.org/pandas-docs/stable/basics.html#descriptive-statistics

## Correlation ##

In [22]:
frame = DataFrame(np.random.random((10,4)), columns=list("abcd"))
frame

Unnamed: 0,a,b,c,d
0,0.217849,0.056321,0.732191,0.642331
1,0.990432,0.674251,0.539245,0.014772
2,0.840753,0.448216,0.31371,0.014617
3,0.372154,0.131951,0.809327,0.475362
4,0.716636,0.892594,0.537754,0.306107
5,0.827176,0.136766,0.194385,0.510474
6,0.196081,0.296658,0.556442,0.754712
7,0.66861,0.869228,0.248287,0.109076
8,0.053153,0.549109,0.029836,0.068919
9,0.969511,0.68381,0.608652,0.743364


In [23]:
frame.corr()

Unnamed: 0,a,b,c,d
a,1.0,0.434035,-0.003766,-0.214107
b,0.434035,1.0,-0.278419,-0.482284
c,-0.003766,-0.278419,1.0,0.549015
d,-0.214107,-0.482284,0.549015,1.0


In [24]:
frame.cov()

Unnamed: 0,a,b,c,d
a,0.119425,0.046325,-0.000325,-0.022161
b,0.046325,0.095387,-0.021479,-0.044612
c,-0.000325,-0.021479,0.062396,0.041074
d,-0.022161,-0.044612,0.041074,0.089704


In [25]:
frame.corrwith(frame.c)

a   -0.003766
b   -0.278419
c    1.000000
d    0.549015
dtype: float64

## Counts ##

In [27]:
obj = Series(np.random.choice(list("abcde"), 10))
obj

0    d
1    c
2    c
3    a
4    d
5    c
6    b
7    b
8    e
9    b
dtype: object

In [28]:
uniques = obj.unique()
uniques

array(['d', 'c', 'a', 'b', 'e'], dtype=object)

In [29]:
obj.value_counts()

b    3
c    3
d    2
e    1
a    1
dtype: int64

In [30]:
pd.value_counts(np.random.binomial(5, 0.2, size=10))

0    4
1    3
2    2
3    1
dtype: int64

In [32]:
mask = obj.isin(list("cb"))
mask

0    False
1     True
2     True
3    False
4    False
5     True
6     True
7     True
8    False
9     True
dtype: bool

In [33]:
obj[mask]

1    c
2    c
5    c
6    b
7    b
9    b
dtype: object

In [34]:
frame = DataFrame({"Q1":[1,2,3,7,7], "Q2":[2,2,3,4,4], "Q3":[1,2,3,1,2]})
frame

Unnamed: 0,Q1,Q2,Q3
0,1,2,1
1,2,2,2
2,3,3,3
3,7,4,1
4,7,4,2


In [36]:
frame.apply(pd.value_counts).fillna("n/a")

Unnamed: 0,Q1,Q2,Q3
1,1.0,,2.0
2,1.0,2.0,2.0
3,1.0,1.0,1.0
4,,2.0,
7,2.0,,
