# Pandas 
### Python Data Analysis Library

In [2]:
import pandas as pd 
import numpy as np 

In [4]:
s = pd.Series([12,-4,7,9], index=['a','b','c','d'])
s

a    12
b    -4
c     7
d     9
dtype: int64

## Series 

### Evaluating Duplicates in Series 

In [6]:
serd = pd.Series([1,0,2,1,2,3], index=['white','white','blue','green','green','yellow'])
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [7]:
serd.unique()

array([1, 0, 2, 3], dtype=int64)

In [10]:
serd.unique()

array([1, 0, 2, 3], dtype=int64)

In [8]:
serd.value_counts()

1    2
2    2
0    1
3    1
dtype: int64

### Nan Values 

In [11]:
s2 = pd.Series([5,-3,np.NaN,14])
s2

0     5.0
1    -3.0
2     NaN
3    14.0
dtype: float64

In [15]:
s2.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [16]:
s2[s2.notnull()]

0     5.0
1    -3.0
3    14.0
dtype: float64

#### Series from dict 

In [17]:
mydict = {'red': 2000, 'blue': 1000, 'yellow': 500,'orange': 1000}
myseries = pd.Series(mydict)
myseries

red       2000
blue      1000
yellow     500
orange    1000
dtype: int64

## DataFrame

In [20]:
data = {'color' : ['blue','green','yellow','red','white'],
 'object' : ['ball','pen','pencil','paper','mug'],
 'price' : [1.2,1.0,0.6,0.9,1.7]}
data = pd.DataFrame(data)
data

Unnamed: 0,color,object,price
0,blue,ball,1.2
1,green,pen,1.0
2,yellow,pencil,0.6
3,red,paper,0.9
4,white,mug,1.7


In [22]:
data.transpose()

Unnamed: 0,0,1,2,3,4
color,blue,green,yellow,red,white
object,ball,pen,pencil,paper,mug
price,1.2,1.0,0.6,0.9,1.7


In [23]:
## skipped indexing and dropping 
## Imp topics from Pg No 114,117 of fablio nebieli(whatever)

### Alignment of Data

In [24]:
s1 = pd.Series([3,2,5,1],['white','yellow','green','blue'])
s2 = pd.Series([1,4,7,2,1],['white','yellow','black','blue','brown'])

In [25]:
s1

white     3
yellow    2
green     5
blue      1
dtype: int64

In [26]:
s2

white     1
yellow    4
black     7
blue      2
brown     1
dtype: int64

In [27]:
s1+s2

black     NaN
blue      3.0
brown     NaN
green     NaN
white     4.0
yellow    6.0
dtype: float64

In [28]:
frame1 = pd.DataFrame(np.arange(16).reshape((4,4)),
                     index=['red','blue','yellow','white'],
                     columns=['ball','pen','pencil','paper'])
frame2 = pd.DataFrame(np.arange(12).reshape((4,3)),
                       index=['blue','green','white','yellow'],
                       columns=['mug','pen','ball'])
frame1+frame2

Unnamed: 0,ball,mug,paper,pen,pencil
blue,6.0,,,6.0,
green,,,,,
red,,,,,
white,20.0,,,20.0,
yellow,19.0,,,19.0,


## Funcition application and Mapping

##### Funciton by Element 

In [31]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
... index=['red','blue','yellow','white'],
... columns=['ball','pen','pencil','paper'])
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [32]:
np.sqrt(frame)

Unnamed: 0,ball,pen,pencil,paper
red,0.0,1.0,1.414214,1.732051
blue,2.0,2.236068,2.44949,2.645751
yellow,2.828427,3.0,3.162278,3.316625
white,3.464102,3.605551,3.741657,3.872983


##### Functions by Row or Column

In [33]:
frame.apply(lambda x:x**0.5,axis=0)# row

Unnamed: 0,ball,pen,pencil,paper
red,0.0,1.0,1.414214,1.732051
blue,2.0,2.236068,2.44949,2.645751
yellow,2.828427,3.0,3.162278,3.316625
white,3.464102,3.605551,3.741657,3.872983


In [34]:
frame.apply(lambda x:x**0.5,axis=1) #column

Unnamed: 0,ball,pen,pencil,paper
red,0.0,1.0,1.414214,1.732051
blue,2.0,2.236068,2.44949,2.645751
yellow,2.828427,3.0,3.162278,3.316625
white,3.464102,3.605551,3.741657,3.872983


## Statistics Funciton 

In [35]:
frame.describe()

Unnamed: 0,ball,pen,pencil,paper
count,4.0,4.0,4.0,4.0
mean,6.0,7.0,8.0,9.0
std,5.163978,5.163978,5.163978,5.163978
min,0.0,1.0,2.0,3.0
25%,3.0,4.0,5.0,6.0
50%,6.0,7.0,8.0,9.0
75%,9.0,10.0,11.0,12.0
max,12.0,13.0,14.0,15.0


In [36]:
frame.sum()

ball      24
pen       28
pencil    32
paper     36
dtype: int64

In [37]:
frame.mean()

ball      6.0
pen       7.0
pencil    8.0
paper     9.0
dtype: float64

In [38]:
## skipped sort_index and sort_values on Page 125

## Correlation and Covariance

In [39]:
seq2 = pd.Series([3,4,3,4,5,4,3,2],['2006','2007','2008','2009','2010','2011','2012','2013'])
seq = pd.Series([1,2,3,4,4,3,2,1],['2006','2007','2008','2009','2010','2011','2012','2013'])
seq.corr(seq2)

0.7745966692414835

In [40]:
seq.cov(seq2)

0.8571428571428571

In [41]:
frame2 = pd.DataFrame([[1,4,3,6],[4,5,6,1],[3,3,1,5],[4,1,6,4]],
... index=['red','blue','yellow','white'],
... columns=['ball','pen','pencil','paper'])

In [42]:
frame2.corr()

Unnamed: 0,ball,pen,pencil,paper
ball,1.0,-0.276026,0.57735,-0.763763
pen,-0.276026,1.0,-0.079682,-0.361403
pencil,0.57735,-0.079682,1.0,-0.692935
paper,-0.763763,-0.361403,-0.692935,1.0


In [43]:
frame2.cov()

Unnamed: 0,ball,pen,pencil,paper
ball,2.0,-0.666667,2.0,-2.333333
pen,-0.666667,2.916667,-0.333333,-1.333333
pencil,2.0,-0.333333,6.0,-3.666667
paper,-2.333333,-1.333333,-3.666667,4.666667


In [44]:
ser = pd.Series([0,1,2,3,9],index=['red','blue','yellow','white','green'])
frame2.corrwith(ser)

ball      0.730297
pen      -0.831522
pencil    0.210819
paper    -0.119523
dtype: float64

In [45]:
# Skipped filterring Nan Values
# study 
# ->fillna 
# ->isnull ,notnull
# -> ffill ,bfill

### Hierachial Indexing 

In [49]:
mser = pd.Series(np.random.rand(8), 
                index=[['white','white','white','blue','blue','red','red','red'],
                 ['up','down','right','up','down','up','down','left']])
mser

white  up       0.560047
       down     0.949407
       right    0.953071
blue   up       0.193584
       down     0.124685
red    up       0.062151
       down     0.629009
       left     0.631502
dtype: float64

In [50]:
mser.unstack()

Unnamed: 0,down,left,right,up
blue,0.124685,,,0.193584
red,0.629009,0.631502,,0.062151
white,0.949407,,0.953071,0.560047
