In [1]:
# Import NumPy package and load pandas

# Reference:  https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html
import numpy as np
import pandas as pd

In [4]:
dict = {
    'col1' : {'R1':1.2,'R2':2.2}, 
    'col2':{'R1':3.2,'R2':4.2, 'R3':5.5},
    'col3' : {'R1':'A', 'R2':'B','R3':'C','R4:':'D'},
    'col4' : {'R1':'X', 'R2':'Y','R3':'Z','R6:':'D'},
    'col5' : {'R1':1, 'R2':2,'R3':3,'R5:':4}
}


In [8]:
for i in dict:
    print(i,dict[i])

col1 {'R1': 1.2, 'R2': 2.2}
col2 {'R1': 3.2, 'R2': 4.2, 'R3': 5.5}
col3 {'R1': 'A', 'R2': 'B', 'R3': 'C', 'R4:': 'D'}
col4 {'R1': 'X', 'R2': 'Y', 'R3': 'Z', 'R6:': 'D'}
col5 {'R1': 1, 'R2': 2, 'R3': 3, 'R5:': 4}


In [9]:
df=pd.DataFrame(dict)

In [10]:
df

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0
R4:,,,D,,
R5:,,,,,4.0
R6:,,,,D,


In [11]:
# Get column
df.columns

Index(['col1', 'col2', 'col3', 'col4', 'col5'], dtype='object')

In [12]:
# Get Rows
df.index

Index(['R1', 'R2', 'R3', 'R4:', 'R5:', 'R6:'], dtype='object')

In [13]:
df.head()# head method selects only the first five rows

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0
R4:,,,D,,
R5:,,,,,4.0


# Accessing Columns

In [None]:
Accessing the elements by dict-like notation or by attribute

In [15]:
print(type(df['col1']))

<class 'pandas.core.series.Series'>


In [16]:
print(df['col3'])

R1       A
R2       B
R3       C
R4:      D
R5:    NaN
R6:    NaN
Name: col3, dtype: object


In [17]:
print(type(df.col1))

<class 'pandas.core.series.Series'>


In [18]:
print(df.col3)

R1       A
R2       B
R3       C
R4:      D
R5:    NaN
R6:    NaN
Name: col3, dtype: object


In [20]:
df.col3

R1       A
R2       B
R3       C
R4:      D
R5:    NaN
R6:    NaN
Name: col3, dtype: object

# Access a subset of the columns in a dataframe by placing the list of columns

In [21]:
df.col3.head(3)

R1    A
R2    B
R3    C
Name: col3, dtype: object

In [23]:
df[['col1','col4']].head(4)

Unnamed: 0,col1,col4
R1,1.2,X
R2,2.2,Y
R3,,Z
R4:,,


# Slicing

In [24]:
df['col2'][:2]

R1    3.2
R2    4.2
Name: col2, dtype: float64

# Access a subset of the columns in a dataframe by placing the list of columns

In [None]:
Rows can be accessed by position or name with the .loc() and iloc() methods

In [25]:
df.loc['R2']

col1    2.2
col2    4.2
col3      B
col4      Y
col5      2
Name: R2, dtype: object

In [26]:
type(df.loc['R2']) # Accessed by label

pandas.core.series.Series

In [27]:
df

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0
R4:,,,D,,
R5:,,,,,4.0
R6:,,,,D,


In [29]:
df.iloc[0] # Access with index

col1    1.2
col2    3.2
col3      A
col4      X
col5      1
Name: R1, dtype: object

In [31]:
#Multiple Rows can be accessed by passing list of row position or index
df.loc[['R2','R3']]

Unnamed: 0,col1,col2,col3,col4,col5
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0


In [36]:
df.iloc[[1,2,3]]

Unnamed: 0,col1,col2,col3,col4,col5
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0
R4:,,,D,,


In [None]:
Get selective row and columns

In [37]:
df.iloc[[1,3],[1,3]]

Unnamed: 0,col2,col4
R2,4.2,Y
R4:,,


# Slicing Row

In [38]:
df[:2] # Get first two rows

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R2,2.2,4.2,B,Y,2.0


In [39]:
df[-2:]  # Get last two rows

Unnamed: 0,col1,col2,col3,col4,col5
R5:,,,,,4.0
R6:,,,,D,


# Accessing Value and Filtering with rows and cols

In [40]:
df

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0
R4:,,,D,,
R5:,,,,,4.0
R6:,,,,D,


In [41]:
df.col3.iloc[2]

'C'

In [47]:
df.loc['R1'] # as row transpose

col1    1.2
col2    3.2
col3      A
col4      X
col5      1
Name: R1, dtype: object

In [48]:
df.loc[['R1']] # as rows

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0


In [50]:
df.iloc[1]

col1    2.2
col2    4.2
col3      B
col4      Y
col5      2
Name: R2, dtype: object

In [51]:
df.col1.iloc[1]

2.2

In [55]:
type(df.col3.iloc[2])

str

In [56]:
df.col3.iloc[[2]]

R3    C
Name: col3, dtype: object

In [57]:
df.col2>4

R1     False
R2      True
R3      True
R4:    False
R5:    False
R6:    False
Name: col2, dtype: bool

In [59]:
df.col2.isnull()

R1     False
R2     False
R3     False
R4:     True
R5:     True
R6:     True
Name: col2, dtype: bool

In [64]:
df.iloc[0].isnull()

col1    False
col2    False
col3    False
col4    False
col5    False
Name: R1, dtype: bool

In [66]:
df.iloc[[2]].isnull()

Unnamed: 0,col1,col2,col3,col4,col5
R3,True,False,False,False,False


In [69]:
df.loc['R3'].isnull()

col1     True
col2    False
col3    False
col4    False
col5    False
Name: R3, dtype: bool

In [74]:
df.col2  # Filter by columns

R1     3.2
R2     4.2
R3     5.5
R4:    NaN
R5:    NaN
R6:    NaN
Name: col2, dtype: float64

In [73]:
df[['col2']]>4 # Filter by value

Unnamed: 0,col2
R1,False
R2,True
R3,True
R4:,False
R5:,False
R6:,False


In [70]:
df[(df.col2 > 4)]

Unnamed: 0,col1,col2,col3,col4,col5
R2,2.2,4.2,B,Y,2.0
R3,,5.5,C,Z,3.0


In [80]:
df[(df.col5> 2)]

Unnamed: 0,col1,col2,col3,col4,col5
R3,,5.5,C,Z,3.0
R5:,,,,,4.0


In [82]:
df[(df.col1.notnull())]  # Not Null

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R2,2.2,4.2,B,Y,2.0


In [83]:
df[df.col3.isin(['A','D'])]  # In Clause

Unnamed: 0,col1,col2,col3,col4,col5
R1,1.2,3.2,A,X,1.0
R4:,,,D,,
