## Function by Element
NumPy functions are used. Foundation of pandas library is based upon the NumPy library.

In [1]:
import numpy as np
import pandas as pd

In [2]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
... index=['red','blue','yellow','white'],
... columns=['ball','pen','pencil','paper'])

In [3]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [4]:
# For example sqrt() function
np.sqrt(frame)

Unnamed: 0,ball,pen,pencil,paper
red,0.0,1.0,1.414214,1.732051
blue,2.0,2.236068,2.44949,2.645751
yellow,2.828427,3.0,3.162278,3.316625
white,3.464102,3.605551,3.741657,3.872983


## Functions by Row or Column
The application of the functions is not limited to the ufunc functions, but also includes
those defined by the user. The important point is that they operate on a one-dimensional
array, giving a single number as a result.<p>For example, you can define a lambda function
that calculates the range covered by the elements in an array.</p><br>f = lambda x: x.max() - x.min()</br>

In [20]:
f = lambda x: x.max() - x.min()

In [21]:
# It is possible to define the function as well
def c(x):
    return x.max() - x.min()

In [22]:
# Using the apply() function, you can apply the function just defined on the dataframe
frame.apply(c)

ball      12
pen       12
pencil    12
paper     12
dtype: int64

In [23]:
frame.apply(c, axis=1)

red       3
blue      3
yellow    3
white     3
dtype: int64

In [24]:
def f(x):
    return pd.Series([x.min(), x.max()],index=['min','max'])

In [25]:
frame.apply(f)

Unnamed: 0,ball,pen,pencil,paper
min,0,1,2,3
max,12,13,14,15


## Statistics Functions

In [26]:
frame.sum()

ball      24
pen       28
pencil    32
paper     36
dtype: int64

In [27]:
frame.mean()

ball      6.0
pen       7.0
pencil    8.0
paper     9.0
dtype: float64

In [28]:
# describe() funciton is used to obtain summary statistics at once
frame.describe()

Unnamed: 0,ball,pen,pencil,paper
count,4.0,4.0,4.0,4.0
mean,6.0,7.0,8.0,9.0
std,5.163978,5.163978,5.163978,5.163978
min,0.0,1.0,2.0,3.0
25%,3.0,4.0,5.0,6.0
50%,6.0,7.0,8.0,9.0
75%,9.0,10.0,11.0,12.0
max,12.0,13.0,14.0,15.0


## Sorting and Ranking

In [29]:
ser = pd.Series([5,0,3,8,4],
... index=['red','blue','yellow','white','green'])
ser

red       5
blue      0
yellow    3
white     8
green     4
dtype: int64

In [30]:
# The items will be sorted in ascending alphabetical order
ser.sort_index()

blue      0
green     4
red       5
white     8
yellow    3
dtype: int64

In [33]:
# To sort the items in descending alphabetical order
ser.sort_index(ascending=False)

yellow    3
white     8
red       5
green     4
blue      0
dtype: int64

In [34]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
... index=['red','blue','yellow','white'],
... columns=['ball','pen','pencil','paper'])
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [35]:
frame.sort_index()

Unnamed: 0,ball,pen,pencil,paper
blue,4,5,6,7
red,0,1,2,3
white,12,13,14,15
yellow,8,9,10,11


In [36]:
frame.sort_index(axis=1)

Unnamed: 0,ball,paper,pen,pencil
red,0,3,1,2
blue,4,7,5,6
yellow,8,11,9,10
white,12,15,13,14


Sorting values within the data structure

In [37]:
ser

red       5
blue      0
yellow    3
white     8
green     4
dtype: int64

In [38]:
ser.sort_values()

blue      0
yellow    3
green     4
red       5
white     8
dtype: int64

In [41]:
# For a dataframe 'by' option needs to be passed for specifying the column on which sorting is to be done.
frame.sort_values(by='pen')

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [42]:
frame.sort_values(by=['ball','pencil'])

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


## Ranking
It mainly consists of assigning
a rank (that is, a value that starts at 0 and then increase gradually) to each element of the
series. The rank will be assigned starting from the lowest value to the highest.

In [43]:
ser.rank()

red       4.0
blue      1.0
yellow    2.0
white     5.0
green     3.0
dtype: float64

In [44]:
ser.rank(method='first')

red       4.0
blue      1.0
yellow    2.0
white     5.0
green     3.0
dtype: float64

In [45]:
ser.rank(ascending=False)

red       2.0
blue      5.0
yellow    4.0
white     1.0
green     3.0
dtype: float64

## Correlation and Covariance

In [46]:
seq2 = pd.Series([3,4,3,4,5,4,3,2],['2006','2007','2008',
'2009','2010','2011','2012','2013'])

In [47]:
seq = pd.Series([1,2,3,4,4,3,2,1],['2006','2007','2008',
'2009','2010','2011','2012','2013'])

In [49]:
seq.corr(seq2)

0.7745966692414835

In [50]:
seq.cov(seq2)

0.8571428571428571

In [51]:
frame2 = pd.DataFrame([[1,4,3,6],[4,5,6,1],[3,3,1,5],[4,1,6,4]],
... index=['red','blue','yellow','white'],
... columns=['ball','pen','pencil','paper'])

In [52]:
frame2

Unnamed: 0,ball,pen,pencil,paper
red,1,4,3,6
blue,4,5,6,1
yellow,3,3,1,5
white,4,1,6,4


In [53]:
frame2.corr()

Unnamed: 0,ball,pen,pencil,paper
ball,1.0,-0.276026,0.57735,-0.763763
pen,-0.276026,1.0,-0.079682,-0.361403
pencil,0.57735,-0.079682,1.0,-0.692935
paper,-0.763763,-0.361403,-0.692935,1.0


In [55]:
frame2.cov()

Unnamed: 0,ball,pen,pencil,paper
ball,2.0,-0.666667,2.0,-2.333333
pen,-0.666667,2.916667,-0.333333,-1.333333
pencil,2.0,-0.333333,6.0,-3.666667
paper,-2.333333,-1.333333,-3.666667,4.666667


In [56]:
ser = pd.Series([0,1,2,3,9],
... index=['red','blue','yellow','white','green'])

In [57]:
ser

red       0
blue      1
yellow    2
white     3
green     9
dtype: int64

In [58]:
frame2.corrwith(ser)

ball      0.730297
pen      -0.831522
pencil    0.210819
paper    -0.119523
dtype: float64

In [59]:
frame2.corrwith(frame)

ball      0.730297
pen      -0.831522
pencil    0.210819
paper    -0.119523
dtype: float64