### Indexing and Selecting Data in Pandas  
**Author:** Taskeen Hussain  

#### Pandas provides powerful tools for indexing and selecting data in Series and DataFrames. These capabilities allow for efficient data manipulation and extraction based on labels, positions, or conditions.

In [16]:
# ### .loc()
## Pandas provide various methods in order to get purely label-location based indexing. When slicing, 
# the start bound is also included. Integers are valid labels, but they refer to the label and not the position.

#import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])


In [2]:
#select all rows for a specific column
print (df.loc[:,'A'])



# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])


a   -0.049177
b   -1.253712
c   -0.223615
d    0.494638
e    0.002327
f    3.153442
g   -0.637233
h    0.346875
Name: A, dtype: float64


In [3]:
# Select all rows for multiple columns, say list[]
print (df.loc[:,['A','C']])



          A         C
a  0.876225  0.818642
b  1.787640  0.481568
c -1.425584 -1.436990
d  0.182522  0.142021
e  0.448474 -0.297705
f  0.670024 -0.916298
g -0.317001  0.696608
h  0.438156  1.172570


In [4]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])

# Select few rows for multiple columns, say list[]
print (df.loc[['a','b','f','h'],['A','C']])


          A         C
a -0.546576 -1.920782
b -2.518808  0.066317
f -0.030905 -1.351596
h -1.329044 -1.330258


In [5]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])

In [6]:
# Select range of rows for all columns
print (df.loc['a':'h'])

          A         B         C         D
a -0.045174  1.037441  0.833452  0.937949
b  0.591001  0.890791 -0.360237  0.874845
c  1.458524 -0.760742 -0.026624 -0.881787
d  0.049604  0.627389 -1.636056  0.860902
e -0.949744 -1.058138  0.284911  0.821904
f -0.599039 -0.508382  1.409918  1.782932
g -0.766988 -0.179092  0.820745  0.373197
h  0.750584 -0.821707 -1.723493  1.437627


In [7]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])

In [8]:
# for getting values with a boolean array
print (df.loc['a']>0)

A    False
B     True
C     True
D    False
Name: a, dtype: bool


In [9]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# select all rows for a specific column
print (df.iloc[:4])

          A         B         C         D
0  1.054542  0.163990  0.577654  0.088692
1 -0.031826  0.698213  0.462507  0.077089
2 -0.763654 -0.795877 -0.856336 -0.787240
3 -0.408231  0.939831  0.312838  1.728401


In [10]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Integer slicing
print (df.iloc[:4])
print (df.iloc[1:5, 2:4])


          A         B         C         D
0 -0.131230 -0.655680  1.051439 -1.355403
1  0.634255 -0.960295  0.878482  0.527476
2 -0.312977  0.247895 -0.020591  1.082616
3  1.125324 -1.224064 -0.080451 -0.567372
          C         D
1  0.878482  0.527476
2 -0.020591  1.082616
3 -0.080451 -0.567372
4 -0.900104 -0.883426


In [11]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Slicing through list of values
print (df.iloc[[1, 3, 5], [1, 3]])
print (df.iloc[1:3, :])
print (df.iloc[:,1:3])


          B         D
1  1.676198 -1.136831
3  0.502104  1.015073
5 -1.681207 -0.044198
          A         B         C         D
1  0.607060  1.676198 -0.915453 -1.136831
2 -0.215823 -0.713756 -0.989951 -0.169976
          B         C
0 -0.904126 -1.477769
1  1.676198 -0.915453
2 -0.713756 -0.989951
3  0.502104  0.439766
4  0.880567 -0.777759
5 -1.681207 -1.239048
6 -0.735057  0.088473
7  0.473846 -0.295581


In [12]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])
print (df['A'])


0    0.435410
1   -1.536373
2   -0.738991
3    0.499210
4   -1.229502
5   -1.111364
6   -0.144216
7   -0.983759
Name: A, dtype: float64


In [13]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

print (df[['A','B']])

          A         B
0 -1.050237  0.885514
1 -0.749723  0.102322
2  1.583710  1.511573
3 -1.189151 -0.177588
4  0.469450 -1.268152
5 -2.058323  0.250829
6  0.402082 -0.838654
7  0.581484 -0.318354


In [14]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])
print (df[2:2])

Empty DataFrame
Columns: [A, B, C, D]
Index: []


In [15]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

print (df.A)

0   -0.116110
1    0.248947
2   -0.591744
3   -1.445159
4   -2.934350
5    0.193104
6   -1.011759
7    1.322058
Name: A, dtype: float64
