There are multiple ways to select rows and columns from Pandas DataFrames.

#Selecting a single column
#Selection by label
#Selection by position

In [60]:
import numpy as np
import pandas as pd

In [61]:
dates=pd.date_range('20190101',periods=8)

In [62]:
df = pd.DataFrame(np.random.randn(8,4),index=dates,columns=list('PQRS'))

In [63]:
df

Unnamed: 0,P,Q,R,S
2019-01-01,1.008235,0.067111,0.20284,0.791163
2019-01-02,0.106733,-0.642281,-1.050975,1.567915
2019-01-03,-1.012137,0.492158,0.584488,1.333525
2019-01-04,0.977191,1.729427,0.76661,0.156009
2019-01-05,-0.15679,-1.824143,-0.331837,-0.209881
2019-01-06,-1.098343,-0.728234,0.828643,0.619616
2019-01-07,-1.227163,-1.134469,-1.904183,0.606606
2019-01-08,0.902574,1.26354,0.913225,1.55836


In [64]:
df['P']

2019-01-01    1.008235
2019-01-02    0.106733
2019-01-03   -1.012137
2019-01-04    0.977191
2019-01-05   -0.156790
2019-01-06   -1.098343
2019-01-07   -1.227163
2019-01-08    0.902574
Freq: D, Name: P, dtype: float64

In [65]:
#Selecting via [], which slices the rows.
df[0:3]

Unnamed: 0,P,Q,R,S
2019-01-01,1.008235,0.067111,0.20284,0.791163
2019-01-02,0.106733,-0.642281,-1.050975,1.567915
2019-01-03,-1.012137,0.492158,0.584488,1.333525


In [66]:
df['20190102':'20190104']

Unnamed: 0,P,Q,R,S
2019-01-02,0.106733,-0.642281,-1.050975,1.567915
2019-01-03,-1.012137,0.492158,0.584488,1.333525
2019-01-04,0.977191,1.729427,0.76661,0.156009


In [67]:
#Selection by label
#For getting a cross section using a label:
df.loc[dates[0]]

P    1.008235
Q    0.067111
R    0.202840
S    0.791163
Name: 2019-01-01 00:00:00, dtype: float64

In [68]:
#Selecting on a multi-axis by label:
df.loc[:,['P','Q']]

Unnamed: 0,P,Q
2019-01-01,1.008235,0.067111
2019-01-02,0.106733,-0.642281
2019-01-03,-1.012137,0.492158
2019-01-04,0.977191,1.729427
2019-01-05,-0.15679,-1.824143
2019-01-06,-1.098343,-0.728234
2019-01-07,-1.227163,-1.134469
2019-01-08,0.902574,1.26354


In [69]:
#Show label slicing, both endpoints are included:
df.loc['20190102':'20190104',['P','Q']]

Unnamed: 0,P,Q
2019-01-02,0.106733,-0.642281
2019-01-03,-1.012137,0.492158
2019-01-04,0.977191,1.729427


In [70]:
#Reduction in the dimensions of the returned object:
df.loc['20190102',['P','Q']]

P    0.106733
Q   -0.642281
Name: 2019-01-02 00:00:00, dtype: float64

In [71]:
#For getting a scalar value:
df.loc[dates[0],'P']

1.0082353715488257

In [72]:
#For getting fast access to a scalar (equivalent to the prior method):
df.at[dates[0],'P']

1.0082353715488257

In [73]:
#Selection by position
df.iloc[3]

P    0.977191
Q    1.729427
R    0.766610
S    0.156009
Name: 2019-01-04 00:00:00, dtype: float64

In [74]:
#By integer slices, acting similar to numpy/python:
df.iloc[3:5,0:2]

Unnamed: 0,P,Q
2019-01-04,0.977191,1.729427
2019-01-05,-0.15679,-1.824143


In [75]:
#By lists of integer position locations, similar to the numpy/python style:
df.iloc[[1,2,4],[0,2]]

Unnamed: 0,P,R
2019-01-02,0.106733,-1.050975
2019-01-03,-1.012137,0.584488
2019-01-05,-0.15679,-0.331837


In [76]:
#Slice rows explicitly:
df.iloc[1:3,:]

Unnamed: 0,P,Q,R,S
2019-01-02,0.106733,-0.642281,-1.050975,1.567915
2019-01-03,-1.012137,0.492158,0.584488,1.333525


In [77]:
#Slice columns explicitly:
df.iloc[:,1:3]

Unnamed: 0,Q,R
2019-01-01,0.067111,0.20284
2019-01-02,-0.642281,-1.050975
2019-01-03,0.492158,0.584488
2019-01-04,1.729427,0.76661
2019-01-05,-1.824143,-0.331837
2019-01-06,-0.728234,0.828643
2019-01-07,-1.134469,-1.904183
2019-01-08,1.26354,0.913225


In [79]:
#Get a value explicitly:
df.iloc[1,1]

-0.6422811649845148

In [80]:
#Get fast access to a scalar:
df.iat[1,1]

-0.6422811649845148

In [82]:
#Boolean indexing:
#Using a single column’s values to select data.
df[df.P>0]

Unnamed: 0,P,Q,R,S
2019-01-01,1.008235,0.067111,0.20284,0.791163
2019-01-02,0.106733,-0.642281,-1.050975,1.567915
2019-01-04,0.977191,1.729427,0.76661,0.156009
2019-01-08,0.902574,1.26354,0.913225,1.55836


In [84]:
#Select values from a DataFrame where a boolean condition is met.
df[df>0]

Unnamed: 0,P,Q,R,S
2019-01-01,1.008235,0.067111,0.20284,0.791163
2019-01-02,0.106733,,,1.567915
2019-01-03,,0.492158,0.584488,1.333525
2019-01-04,0.977191,1.729427,0.76661,0.156009
2019-01-05,,,,
2019-01-06,,,0.828643,0.619616
2019-01-07,,,,0.606606
2019-01-08,0.902574,1.26354,0.913225,1.55836


In [85]:
#Using the isin() method for filtering:
df2 = pd.DataFrame({'A': 1.,
                        'B': pd.Timestamp('20190102'),
                        'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                        'D': np.array([3] * 4, dtype='int32'),
                        'E': pd.Categorical(["test", "train", "test", "train"]),
                        'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2019-01-02,1.0,3,test,foo
1,1.0,2019-01-02,1.0,3,train,foo
2,1.0,2019-01-02,1.0,3,test,foo
3,1.0,2019-01-02,1.0,3,train,foo


In [33]:
df2=df.copy()
df2

Unnamed: 0,P,Q,R,S
2019-01-01,-1.001579,-0.507693,-2.070227,-1.745844
2019-01-02,-0.170597,1.802023,-0.052192,-0.027481
2019-01-03,0.177143,-1.689638,-0.072257,0.43568
2019-01-04,-0.115981,-1.735017,-0.195699,-0.488697


In [32]:
import pandas as pd
dates = pd.date_range('20190101', periods=4)
df = pd.DataFrame(np.random.randn(4, 4), index=dates, columns=list('PQRS'))
df2 = df.copy()
df2['E'] = ['test', 'train', 'test', 'train']
df2

Unnamed: 0,P,Q,R,S,E
2019-01-01,-1.001579,-0.507693,-2.070227,-1.745844,test
2019-01-02,-0.170597,1.802023,-0.052192,-0.027481,train
2019-01-03,0.177143,-1.689638,-0.072257,0.43568,test
2019-01-04,-0.115981,-1.735017,-0.195699,-0.488697,train


In [35]:
df2[df2['E'].isin(['test','train'])]

Unnamed: 0,A,B,C,D,E,F
0,1.0,2019-01-02,1.0,3,test,foo
1,1.0,2019-01-02,1.0,3,train,foo
2,1.0,2019-01-02,1.0,3,test,foo
3,1.0,2019-01-02,1.0,3,train,foo


In [86]:
#Setting
#Setting a new column automatically aligns the data by the indexes.
s=pd.Series([1,4,np.nan,6,8])

In [38]:
s1=pd.Series([1,2,3,4,5,6],index=pd.date_range('20130102',periods=6))

In [39]:
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [87]:
#Setting values by label:
df.at[dates[0],'P']=0

In [88]:
#Setting values by position:
In [49]: df.iat[0,1]=0

In [89]:
#Setting by assigning with a NumPy array:
In [50]: df.loc[:, 'S'] = np.array([5] * len(df))

  df.loc[:, 'S'] = np.array([5] * len(df))


In [90]:
#The result of the prior setting operations.
df

Unnamed: 0,P,Q,R,S
2019-01-01,0.0,0.0,0.20284,5
2019-01-02,0.106733,-0.642281,-1.050975,5
2019-01-03,-1.012137,0.492158,0.584488,5
2019-01-04,0.977191,1.729427,0.76661,5
2019-01-05,-0.15679,-1.824143,-0.331837,5
2019-01-06,-1.098343,-0.728234,0.828643,5
2019-01-07,-1.227163,-1.134469,-1.904183,5
2019-01-08,0.902574,1.26354,0.913225,5


In [91]:
#A where operation with setting.
df2=df.copy()

In [92]:
df2[df2>0]=-df2
df2

Unnamed: 0,P,Q,R,S
2019-01-01,0.0,0.0,-0.20284,-5
2019-01-02,-0.106733,-0.642281,-1.050975,-5
2019-01-03,-1.012137,-0.492158,-0.584488,-5
2019-01-04,-0.977191,-1.729427,-0.76661,-5
2019-01-05,-0.15679,-1.824143,-0.331837,-5
2019-01-06,-1.098343,-0.728234,-0.828643,-5
2019-01-07,-1.227163,-1.134469,-1.904183,-5
2019-01-08,-0.902574,-1.26354,-0.913225,-5
