In [1]:
import numpy as np
import pandas as pd

In [2]:
#Chained-indexing as in arr[m][n] vs multidimenional-indexing as in ar[m,n]

In [3]:
arr = np.arange(1,40,2).reshape(4,5)

In [4]:
print(arr)

[[ 1  3  5  7  9]
 [11 13 15 17 19]
 [21 23 25 27 29]
 [31 33 35 37 39]]


In [5]:
#literal extraction of element at poistion 1,2 ---Multi-diminsional indexing
print(arr[1,2])

15


In [6]:
print()




In [7]:
#creates a temp single deminsional arrary of the value then picks this element -- chained indexing 
print(arr[1][2])

15


In [8]:
#while above is fine for accessing individual elements from a 2D array, 
# it can produce weird results when working on 2D slices as seen below 


In [9]:
arr = np.arange(1,40,2).reshape(4,5)

In [10]:
arr

array([[ 1,  3,  5,  7,  9],
       [11, 13, 15, 17, 19],
       [21, 23, 25, 27, 29],
       [31, 33, 35, 37, 39]])

In [11]:
print(arr)

[[ 1  3  5  7  9]
 [11 13 15 17 19]
 [21 23 25 27 29]
 [31 33 35 37 39]]


In [12]:

#####multidimenional-indexing###########

#this will pick up the 2D slices
print(arr[0:3,0:2])

[[ 1  3]
 [11 13]
 [21 23]]


In [13]:
#####Chained-indexing###########

#This will create a temp 2D array arr[0:3],
# and then apply [0:2] to that temp 2D array, thus giving the wrong result 
print(arr[0:3][0:2])

[[ 1  3  5  7  9]
 [11 13 15 17 19]]


In [None]:
#slices of arrays do not copy the internal array data but only produce 
# new views of the original data 
# An explicit copy() is therfore recommended so you don't end up tampering 
# with the orginal data 

In [14]:
a = np.arange(1,40,2).reshape(4,5)

In [15]:
print(a)

[[ 1  3  5  7  9]
 [11 13 15 17 19]
 [21 23 25 27 29]
 [31 33 35 37 39]]


In [17]:
b = a[0:3,0:2] # this is giving a view of the data not a copy
print(b)

[[ 1  3]
 [11 13]
 [21 23]]


In [18]:
a[0,0]=-1000

In [19]:
print(a)

[[-1000     3]
 [   11    13]
 [   21    23]]


In [20]:
print(b)

[[-1000     3     5     7     9]
 [   11    13    15    17    19]
 [   21    23    25    27    29]
 [   31    33    35    37    39]]


In [27]:
a = np.arange(1,40,2).reshape(4,5)
print(a)

[[ 1  3  5  7  9]
 [11 13 15 17 19]
 [21 23 25 27 29]
 [31 33 35 37 39]]


In [28]:
b = a[0:3,0:2].copy() # this is giving me a copy
print(b)

[[ 1  3]
 [11 13]
 [21 23]]


In [30]:
a[0,0]=-11111
print(a)

[[-11111      3      5      7      9]
 [    11     13     15     17     19]
 [    21     23     25     27     29]
 [    31     33     35     37     39]]


In [31]:
print(b) # variable b is unchanged 

[[ 1  3]
 [11 13]
 [21 23]]


In [32]:
###########################################################

In [33]:
#setting with copy warning

In [34]:
#### Note on how to clear cache in Notebook: Kernel-->Restart & Clear Output

In [None]:
### You may see SettinhWithCopyWarning if you are using chaining indexing. You should use
# multidimenional-indexing to avoid this 

In [36]:
np.random.seed(0)
df = pd.DataFrame(data=np.random.normal(size=(4,6)),
                 index=['w','x','y','z'],
                 columns=['a','b','c','d','e','f'])
print(df)

          a         b         c         d         e         f
w  1.764052  0.400157  0.978738  2.240893  1.867558 -0.977278
x  0.950088 -0.151357 -0.103219  0.410599  0.144044  1.454274
y  0.761038  0.121675  0.443863  0.333674  1.494079 -0.205158
z  0.313068 -0.854096 -2.552990  0.653619  0.864436 -0.742165


In [None]:
#Assigning to the result of chained-indexing (as in df[m][n]) has inherently unpredictable results
# You can not be sure if the result will return a view of a DF or temp copy. Thus SettingwithCopyWarning
# highlight this for you 

In [37]:
## This following is incorrect 
df[df['f']>0]['b']=-1111

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[df['f']>0]['b']=-1111


In [38]:
#The following is correct 
df.loc[df['f']>0,'b'] = -1111

In [39]:
df

Unnamed: 0,a,b,c,d,e,f
w,1.764052,0.400157,0.978738,2.240893,1.867558,-0.977278
x,0.950088,-1111.0,-0.103219,0.410599,0.144044,1.454274
y,0.761038,0.121675,0.443863,0.333674,1.494079,-0.205158
z,0.313068,-0.854096,-2.55299,0.653619,0.864436,-0.742165


In [40]:
# You should you the df[m,n] form in general to avoid warning - esp when making assignments 

In [41]:
#########################################################################################
#########################################################################################
#########################################################################################

In [42]:
#Examples of multidimensional-indexing

In [44]:
np.random.seed(0)
df = pd.DataFrame(data=np.random.normal(size=(4,6)),
                 index=list('wxyz'),
                 columns=list('abcdef'))
print(df)

          a         b         c         d         e         f
w  1.764052  0.400157  0.978738  2.240893  1.867558 -0.977278
x  0.950088 -0.151357 -0.103219  0.410599  0.144044  1.454274
y  0.761038  0.121675  0.443863  0.333674  1.494079 -0.205158
z  0.313068 -0.854096 -2.552990  0.653619  0.864436 -0.742165


In [45]:
# Return row 'w' and all columns
df.loc[['w'],:] #df.loc['w',:] will return a series object 

Unnamed: 0,a,b,c,d,e,f
w,1.764052,0.400157,0.978738,2.240893,1.867558,-0.977278


In [47]:
#return rows 'w' and row 'y'
df.loc[['w','y'],:]

Unnamed: 0,a,b,c,d,e,f
w,1.764052,0.400157,0.978738,2.240893,1.867558,-0.977278
y,0.761038,0.121675,0.443863,0.333674,1.494079,-0.205158


In [49]:
#return a slice of rows. From row w to row y
df.loc['w':'y',:]

Unnamed: 0,a,b,c,d,e,f
w,1.764052,0.400157,0.978738,2.240893,1.867558,-0.977278
x,0.950088,-0.151357,-0.103219,0.410599,0.144044,1.454274
y,0.761038,0.121675,0.443863,0.333674,1.494079,-0.205158


In [51]:
#return column a
df.loc[:,['a']]#df.loc[:,'a'] will return a series object 

Unnamed: 0,a
w,1.764052
x,0.950088
y,0.761038
z,0.313068


In [52]:
#return column a and c
df.loc[:,['a','c']]

Unnamed: 0,a,c
w,1.764052,0.978738
x,0.950088,-0.103219
y,0.761038,0.443863
z,0.313068,-2.55299


In [53]:
#return column a to c
df.loc[:,'a':'c']

Unnamed: 0,a,b,c
w,1.764052,0.400157,0.978738
x,0.950088,-0.151357,-0.103219
y,0.761038,0.121675,0.443863
z,0.313068,-0.854096,-2.55299


In [54]:
#Cross-section of data 
df.loc[['w','y'],['a','b']]

Unnamed: 0,a,b
w,1.764052,0.400157
y,0.761038,0.121675


In [59]:
#Condtional selection 
# inital condtion ---> df.loc[:,'b']<0
df.loc[df.loc[:,'b']>0,:]

Unnamed: 0,a,b,c,d,e,f
w,1.764052,0.400157,0.978738,2.240893,1.867558,-0.977278
y,0.761038,0.121675,0.443863,0.333674,1.494079,-0.205158
