## Pandas Operations

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({'C1':[1,2,3,4,5],
                  'C2':[111,222,333,444,555],
                  'C3':['abc','def','ghi','jkl','mno']})
df.head()

Unnamed: 0,C1,C2,C3
0,1,111,abc
1,2,222,def
2,3,333,ghi
3,4,444,jkl
4,5,555,mno


#### Find unique value in DataFrame

In [3]:
df['C2'].unique()

array([111, 222, 333, 444, 555], dtype=int64)

In [4]:
len(df['C2'].unique())

5

In [7]:
df['C2'].nunique() #nunique method will return same as len function

5

In [8]:
df['C2'].value_counts()

111    1
222    1
333    1
444    1
555    1
Name: C2, dtype: int64

#### Selecting data

In [10]:
df[df['C1']>2]

Unnamed: 0,C1,C2,C3
2,3,333,ghi
3,4,444,jkl
4,5,555,mno


In [12]:
df[(df['C1']>2) & (df['C2']== 444)] #for multiple conditional selection

Unnamed: 0,C1,C2,C3
3,4,444,jkl


##### Apply method
###### for applying build in and user define function

In [13]:
def expo2(x):
    return x**2

In [14]:
df['C1'].apply(expo2)

0     1
1     4
2     9
3    16
4    25
Name: C1, dtype: int64

In [15]:
df['C3'].apply(len)

0    3
1    3
2    3
3    3
4    3
Name: C3, dtype: int64

###### lambda Expression

In [16]:
df['C2'].apply(lambda x: x**2)

0     12321
1     49284
2    110889
3    197136
4    308025
Name: C2, dtype: int64

In [17]:
df.columns

Index(['C1', 'C2', 'C3'], dtype='object')

In [18]:
df.index

RangeIndex(start=0, stop=5, step=1)

##### Sorting and ordering

In [20]:
df.sort_values('C2')

Unnamed: 0,C1,C2,C3
0,1,111,abc
1,2,222,def
2,3,333,ghi
3,4,444,jkl
4,5,555,mno


In [22]:
df.isnull()

Unnamed: 0,C1,C2,C3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False


##### pivot_table

In [24]:
data = {'A':['KKR','KKR','KKR','CSK','CSK','CSK'],
       'B':['one','one','two','two','one','one'],
       'C':['x','y','x','y','x','y'],
       'D':[1,3,2,5,4,1]}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D
0,KKR,one,x,1
1,KKR,one,y,3
2,KKR,two,x,2
3,CSK,two,y,5
4,CSK,one,x,4
5,CSK,one,y,1


In [25]:
df.pivot_table(values='D',index=['A','B'],columns=['C'])

Unnamed: 0_level_0,C,x,y
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
CSK,one,4.0,1.0
CSK,two,,5.0
KKR,one,1.0,3.0
KKR,two,2.0,
