In [1]:
import numpy as np
import pandas as pd

# DataFrames

bunch of Series objects having a common index

In [16]:
from numpy.random import randn
np.random.seed(50) 

In [25]:
df = pd.DataFrame(randn(5,4),
                  index = 'A B C D E'.split(),
                  columns = 'W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


# Selection and Indexing

### Selecting Columns

In [23]:
df['Y']

A    0.746569
B    0.039008
C    0.222244
D    1.471002
E    0.805189
Name: Y, dtype: float64

In [24]:
type(df['Y'])

pandas.core.series.Series

In [26]:
#SQL Syntax (Not recommended)

df.Y

A   -0.031111
B   -0.052551
C    0.137515
D    1.521662
E    0.866425
Name: Y, dtype: float64

In [27]:
df[['W','Z']]

Unnamed: 0,W,Z
A,1.230885,-3.80989
B,-0.19905,-0.634281
C,-0.362931,-0.719296
D,0.728523,-1.306773
E,-0.889768,-0.535536


In [40]:
df['new'] = df['W']+df['Y']

In [41]:
df

Unnamed: 0,W,X,Y,Z,new
A,1.230885,-2.373453,-0.031111,-3.80989,1.199775
B,-0.19905,0.350431,-0.052551,-0.634281,-0.2516
C,-0.362931,-2.57092,0.137515,-0.719296,-0.225416
D,0.728523,1.621745,1.521662,-1.306773,2.250185
E,-0.889768,0.005268,0.866425,-0.535536,-0.023343


In [42]:
#removing columns
df.drop('new',axis = 1)

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [43]:
df

Unnamed: 0,W,X,Y,Z,new
A,1.230885,-2.373453,-0.031111,-3.80989,1.199775
B,-0.19905,0.350431,-0.052551,-0.634281,-0.2516
C,-0.362931,-2.57092,0.137515,-0.719296,-0.225416
D,0.728523,1.621745,1.521662,-1.306773,2.250185
E,-0.889768,0.005268,0.866425,-0.535536,-0.023343


In [44]:
#method1 for removing column permanently
df1 = df.drop('new',axis = 1)

In [45]:
df1

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [46]:
df

Unnamed: 0,W,X,Y,Z,new
A,1.230885,-2.373453,-0.031111,-3.80989,1.199775
B,-0.19905,0.350431,-0.052551,-0.634281,-0.2516
C,-0.362931,-2.57092,0.137515,-0.719296,-0.225416
D,0.728523,1.621745,1.521662,-1.306773,2.250185
E,-0.889768,0.005268,0.866425,-0.535536,-0.023343


In [47]:
#method2
df.drop('new',axis = 1, inplace=True)

In [48]:
df

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [51]:
df.drop('E',axis=0)

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773


In [52]:
df

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


### Selecting Rows

In [54]:
df.loc['B']  #loc for location

W   -0.199050
X    0.350431
Y   -0.052551
Z   -0.634281
Name: B, dtype: float64

In [55]:
df.iloc[0] #iloc for index location

W    1.230885
X   -2.373453
Y   -0.031111
Z   -3.809890
Name: A, dtype: float64

In [56]:
## selecting subset of rows and columns

df.loc['D','Y']

1.5216622497713628

In [58]:
df.loc[['B','C'],['X','Z']]

Unnamed: 0,X,Z
B,0.350431,-0.634281
C,-2.57092,-0.719296


In [61]:
df.iloc[[-5,2],[2,3]]

Unnamed: 0,Y,Z
A,-0.031111,-3.80989
C,0.137515,-0.719296


# Conditional Selection

In [62]:
df

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [63]:
df>0

Unnamed: 0,W,X,Y,Z
A,True,False,False,False
B,False,True,False,False
C,False,False,True,False
D,True,True,True,False
E,False,True,True,False


In [64]:
df[df>0]  #not common to use

Unnamed: 0,W,X,Y,Z
A,1.230885,,,
B,,0.350431,,
C,,,0.137515,
D,0.728523,1.621745,1.521662,
E,,0.005268,0.866425,


In [65]:
df['X']>0

A    False
B     True
C    False
D     True
E     True
Name: X, dtype: bool

In [72]:
df

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
B,-0.19905,0.350431,-0.052551,-0.634281
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [81]:
df[df['X']>0]

Unnamed: 0,W,X,Y,Z
B,-0.19905,0.350431,-0.052551,-0.634281
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [78]:
df[(df['W']>0) & (df['Y']>0) ]

Unnamed: 0,W,X,Y,Z
D,0.728523,1.621745,1.521662,-1.306773


In [79]:
df[(df['W']>0) | (df['Y']>0) ]

Unnamed: 0,W,X,Y,Z
A,1.230885,-2.373453,-0.031111,-3.80989
C,-0.362931,-2.57092,0.137515,-0.719296
D,0.728523,1.621745,1.521662,-1.306773
E,-0.889768,0.005268,0.866425,-0.535536


In [None]:
0-