# Panda's DataFrame

In [1]:
import numpy as np
import pandas as pd
np.random.seed(101)    #For pseudo random values
df= pd.DataFrame(data= np.random.randn(5,4) , index= ['A','B','C','D','E'], columns= ['W','X','Y','Z']) #DF with index & column

In [2]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [3]:
df[df['W']>0]    #Conditional selection

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [4]:
df[df['W']>0]['X']  #If we want dataframe meeting the following condn and 'X' colm from that dataframe.

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [5]:
df[df['W']>0][['X','Y']]  #If we want two columns at once from that DF.

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
D,-0.758872,-0.933237
E,1.978757,2.605967


In [6]:
df[(df['W']>0) & (df['Y']>1)]       #When u try to use multiple condition then you cannot use python normal 'and' operator 

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [7]:
newind = 'AB CD EF GH IJ'.split()    #To create a list of strings.
newind

['AB', 'CD', 'EF', 'GH', 'IJ']

In [8]:
df['Newcolm'] = newind               #Adding new column named 'Newcolm'

In [9]:
df

Unnamed: 0,W,X,Y,Z,Newcolm
A,2.70685,0.628133,0.907969,0.503826,AB
B,0.651118,-0.319318,-0.848077,0.605965,CD
C,-2.018168,0.740122,0.528813,-0.589001,EF
D,0.188695,-0.758872,-0.933237,0.955057,GH
E,0.190794,1.978757,2.605967,0.683509,IJ


In [10]:
df.reset_index()  #reset_index() method resets to numerical index and takes your old index & set it to new colm called 'index'

Unnamed: 0,index,W,X,Y,Z,Newcolm
0,A,2.70685,0.628133,0.907969,0.503826,AB
1,B,0.651118,-0.319318,-0.848077,0.605965,CD
2,C,-2.018168,0.740122,0.528813,-0.589001,EF
3,D,0.188695,-0.758872,-0.933237,0.955057,GH
4,E,0.190794,1.978757,2.605967,0.683509,IJ


In [11]:
df.set_index('Newcolm')               #You can set index by set_index() method based on new column

Unnamed: 0_level_0,W,X,Y,Z
Newcolm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AB,2.70685,0.628133,0.907969,0.503826
CD,0.651118,-0.319318,-0.848077,0.605965
EF,-2.018168,0.740122,0.528813,-0.589001
GH,0.188695,-0.758872,-0.933237,0.955057
IJ,0.190794,1.978757,2.605967,0.683509


# Index Levels

In [12]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))             #Creates a list where it consist of tuples like [('G1',1), ('G2',2)...]
hier_index = pd.MultiIndex.from_tuples(hier_index) #For multiIndex you will understand after going through DF :D 

In [13]:
df= pd.DataFrame(data= np.random.randn(6,2), index= hier_index, columns= ['A','B'])
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [14]:
df.loc['G1']

Unnamed: 0,A,B
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [15]:
df.loc['G1'].loc[2]                 #If we want row indexed 2 from G1

A   -1.706086
B   -1.159119
Name: 2, dtype: float64

In [16]:
df.index.names

FrozenList([None, None])

In [17]:
df.index.names = ['Groups', 'S/n']    #Gives name to the index

In [18]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,S/n,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [19]:
df.loc['G2'].loc[1]['A']            #If we want column A indexed 1 from G2.

0.16690463609281317

In [20]:
df.xs(1, level= 'S/n')              

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502
