In [30]:
import numpy as np

In [31]:
import pandas as pd

In [32]:
from numpy.random import randn

In [33]:
np.random.seed(101) # seed means we get the same random number 

In [34]:
df = pd.DataFrame(randn(5,4), index = 'A B C D E'.split(),columns='W X Y Z'.split())

In [35]:
# another way to do it 
df1 = pd.DataFrame(randn(5,4),index =['A','B','C','D','E'],columns=['W','X','Y','K'])

In [36]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


Selection and Indexing
 the various methods to grab data from a DataFrame

In [37]:
df['W'] # if you only want one column with all the indexes you can do in this fashion


A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [38]:
# what if you want more than one column at the same time you can do this in this way
df[['W','X']] # notice here i used two brackets that is the only difference for more than one we have to use two brackets

Unnamed: 0,W,X
A,2.70685,0.628133
B,0.651118,-0.319318
C,-2.018168,0.740122
D,0.188695,-0.758872
E,0.190794,1.978757


In [39]:
# similar type of example with more than two columns
df[['W','X','Y']] # see here we can index three columns 

Unnamed: 0,W,X,Y
A,2.70685,0.628133,0.907969
B,0.651118,-0.319318,-0.848077
C,-2.018168,0.740122,0.528813
D,0.188695,-0.758872,-0.933237
E,0.190794,1.978757,2.605967


In [40]:
df.loc[['A'],['W','X']] # this is how we can index columns as well as labels

Unnamed: 0,W,X
A,2.70685,0.628133


In [41]:
df.loc[['A','B'],['W','X']] # order should be rows and then columns

Unnamed: 0,W,X
A,2.70685,0.628133
B,0.651118,-0.319318


In [42]:
df['N'] = '4 5 6 8 9'.split() # you can add a column in this way

In [43]:
df

Unnamed: 0,W,X,Y,Z,N
A,2.70685,0.628133,0.907969,0.503826,4
B,0.651118,-0.319318,-0.848077,0.605965,5
C,-2.018168,0.740122,0.528813,-0.589001,6
D,0.188695,-0.758872,-0.933237,0.955057,8
E,0.190794,1.978757,2.605967,0.683509,9


# if you want to drop a the new column you just added we can do this by df.drop let see how
df.drop('N') # now if you only put the column name that you want to drop that will throw an error something like
#labels 'N' does not contain in the axis becuase by default the df.drop has axis 0 which correspond to rows or labels'ABCDE'
#if we want to remove the column we need to put axis =1 with the column name also you need to set inplace argument to True
#for that to see the effect permanently the reason why pandas does this is for you to not lose any information accidently
#let's see all the examples related to this'

In [44]:
df.drop('N',axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [45]:
df

Unnamed: 0,W,X,Y,Z,N
A,2.70685,0.628133,0.907969,0.503826,4
B,0.651118,-0.319318,-0.848077,0.605965,5
C,-2.018168,0.740122,0.528813,-0.589001,6
D,0.188695,-0.758872,-0.933237,0.955057,8
E,0.190794,1.978757,2.605967,0.683509,9


In [46]:
df.drop('N',axis=1,inplace = True)

In [47]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [48]:
# let's try to remove the index or row 
df.drop('E') # see here it will not throw an error because by default the axes is 0 which corresponds to rows(labels)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [49]:
df # since we have not done inplace = True

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [50]:
# there are two ways of indexing rows one with df.loc which you can do directly by passng the row name 
# another way is df.iloc in which we use index number like 01234 ...
df.loc['A','Y']

0.9079694464765431

In [51]:
df.loc[['A','B'],['X','Y']] # notice the difference of brackets here 

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077


In [52]:
df.iloc[[2,3]]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [53]:
df.iloc[3]

W    0.188695
X   -0.758872
Y   -0.933237
Z    0.955057
Name: D, dtype: float64

# part one ends here 

In [66]:
df = df.reindex(columns=['W','Y','Z','X'], copy = True)

In [67]:
df.

Unnamed: 0,W,Y,Z,X
A,2.70685,0.907969,0.503826,0.628133
B,0.651118,-0.848077,0.605965,-0.319318
C,-2.018168,0.528813,-0.589001,0.740122
D,0.188695,-0.933237,0.955057,-0.758872
E,0.190794,2.605967,0.683509,1.978757


In [70]:
df1 = pd.DataFrame(np.random.randn(4,3), index = 'j k l m'.split(), columns= [1,2,3])

In [71]:
df1

Unnamed: 0,1,2,3
j,0.147027,-0.479448,0.558769
k,1.02481,-0.925874,1.862864
l,-1.133817,0.610478,0.38603
m,2.084019,-0.376519,0.230336


In [75]:
df1.iloc[1]

1    1.024810
2   -0.925874
3    1.862864
Name: k, dtype: float64