In [2]:
import pandas as pd
import numpy as np

In [16]:
from numpy.random import randn
np.random.seed(10)

In [17]:
df = pd.DataFrame(randn(5,4),index=list('ABCDE'),columns=['x1' , 'x2' , 'x3' , 'x4' ])

In [18]:
df

Unnamed: 0,x1,x2,x3,x4
A,1.331587,0.715279,-1.5454,-0.008384
B,0.621336,-0.720086,0.265512,0.108549
C,0.004291,-0.1746,0.433026,1.203037
D,-0.965066,1.028274,0.22863,0.445138
E,-1.136602,0.135137,1.484537,-1.079805


In [29]:
df.columns

Index(['x1', 'x2', 'x3', 'x4', 'x5'], dtype='object')

In [30]:
df.values

array([[ 1.3315865 ,  0.71527897, -1.54540029, -0.00838385,  2.04686548],
       [ 0.62133597, -0.72008556,  0.26551159,  0.10854853, -0.09874959],
       [ 0.00429143, -0.17460021,  0.43302619,  1.20303737, -0.17030878],
       [-0.96506567,  1.02827408,  0.22863013,  0.44513761,  0.06320841],
       [-1.13660221,  0.13513688,  1.484537  , -1.07980489, -1.00146533]])

Now, lets discuss how we can do the following operations: 
- Selecting Columns
- Creating new columns 
- Removing columns 
- Selecting rows on the basis of index 
- Selecing rows on the basis of some condition

### Selecting Columns

In [28]:
# Select one column : 
df['x1']

A    1.331587
B    0.621336
C    0.004291
D   -0.965066
E   -1.136602
Name: x1, dtype: float64

In [23]:
# Selecting multiple columns in any order:
df[['x3' , 'x1']]

Unnamed: 0,x3,x1
A,-1.5454,1.331587
B,0.265512,0.621336
C,0.433026,0.004291
D,0.22863,-0.965066
E,1.484537,-1.136602


In [24]:
# Another syntax ( this is not recommended , why?)

In [25]:
df.x1

A    1.331587
B    0.621336
C    0.004291
D   -0.965066
E   -1.136602
Name: x1, dtype: float64

### Creating new columns: 

In [26]:
df['x5']  = df['x1']  + df['x2']

In [27]:
df

Unnamed: 0,x1,x2,x3,x4,x5
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


### Dropping columns & rows: 

In [31]:
# Drop / remove columns : 

# axis =1 for columns 
df.drop('x5' , axis = 1)

Unnamed: 0,x1,x2,x3,x4
A,1.331587,0.715279,-1.5454,-0.008384
B,0.621336,-0.720086,0.265512,0.108549
C,0.004291,-0.1746,0.433026,1.203037
D,-0.965066,1.028274,0.22863,0.445138
E,-1.136602,0.135137,1.484537,-1.079805


In [32]:
# Drop / remove rows : 

# axis =0 for columns 
df.drop('A' , axis = 0)

Unnamed: 0,x1,x2,x3,x4,x5
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


Use `inplace = True` to replace the original dataframe

In [33]:
df.drop('A' , axis = 0  , inplace = True)

In [34]:
df

Unnamed: 0,x1,x2,x3,x4,x5
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


### Using `iloc` and `loc` for indexing

The alternate way of indexing is using: 
- `iloc` : which is used for implicit indexing. The implicit index always starts from '0' and always exists. 
- `loc` : which is used for explicit indexing. The explicit index only exists if specified explicitly like 'A', 'B', etc. in our example above. 

#### Implicit Indexing: 

In [36]:
# Use iloc for indexing rows

# Implicit Indexing (single row):
df.iloc[0]


x1    0.621336
x2   -0.720086
x3    0.265512
x4    0.108549
x5   -0.098750
Name: B, dtype: float64

In [37]:
# Implicit Indexing on multiple rows):
df.iloc[[0,2]]


Unnamed: 0,x1,x2,x3,x4,x5
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
D,-0.965066,1.028274,0.22863,0.445138,0.063208


#### Explicit Indexing: 

In [38]:
# Explicit Indexing (single row): 
df.loc['B' ]

x1    0.621336
x2   -0.720086
x3    0.265512
x4    0.108549
x5   -0.098750
Name: B, dtype: float64

In [39]:
# Explicit Indexing (on multiple rows) 
df.loc[['B' , 'C']]

Unnamed: 0,x1,x2,x3,x4,x5
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309


### Indexing both rows and columns using `loc` and `iloc`

In [42]:
# Implicit Indexing on multiple rows):
df.iloc[[0,2] , [0,2]]

Unnamed: 0,x1,x3
B,0.621336,0.265512
D,-0.965066,0.22863


In [43]:
# Explicit Indexing (on multiple rows)
df.loc[['B' , 'C'] , ['x1' , 'x2']]

Unnamed: 0,x1,x2
B,0.621336,-0.720086
C,0.004291,-0.1746
