[back](./10D-slicing-dataframes.ipynb)

---
### `Slicing DataFrame`


In [1]:
# Importing pandas

import pandas as pd


In [2]:
# Creating data-set

df = pd.DataFrame({'col1': {'row1': 1, 'row2': 4, 'row3': 7},
                  'col2': {'row1': 2, 'row3': 5, 'row4': 8},
                   'col3': {'row1': 3, 'row2': 6, 'row4': 9}})


def divider():
  print('-'*80)


def print_df():
  print(df)
  divider()


print_df()


      col1  col2  col3
row1   1.0   2.0   3.0
row2   4.0   NaN   6.0
row3   7.0   5.0   NaN
row4   NaN   8.0   9.0
--------------------------------------------------------------------------------


In [3]:
# Getting columns based on slicing (but not using : as previously)

cols = df[['col1', 'col2']]
print('First two columns:')
print(cols)
divider()

# Rearranged columns
cols = df [['col3', 'col1', 'col2']]
print('Rearranged columns:')
print(cols)
divider()

First two columns:
      col1  col2
row1   1.0   2.0
row2   4.0   NaN
row3   7.0   5.0
row4   NaN   8.0
--------------------------------------------------------------------------------
Rearranged columns:
      col3  col1  col2
row1   3.0   1.0   2.0
row2   6.0   4.0   NaN
row3   NaN   7.0   5.0
row4   9.0   NaN   8.0
--------------------------------------------------------------------------------


In [4]:
# Row slicing

# Method 1
rows = df.iloc[[0, 1]]
print('First two rows:')
print(rows) # This is still a DataFrame and not series (as it is 2D)
divider()

rows = df.loc[['row1', 'row2']]
print('First two rows based on .loc:')
print(rows)
divider()

First two rows:
      col1  col2  col3
row1   1.0   2.0   3.0
row2   4.0   NaN   6.0
--------------------------------------------------------------------------------
First two rows based on .loc:
      col1  col2  col3
row1   1.0   2.0   3.0
row2   4.0   NaN   6.0
--------------------------------------------------------------------------------


In [5]:
# Shuffling the row order

# Using .iloc
rows = df.iloc[[2, 3, 1, 0]]
print('DataFrame with shuffled rows using .iloc:')
print(rows)
divider()

# Using .loc
rows = df.loc[['row2', 'row4', 'row1', 'row3']]
print('DataFrame with shuffled rows using .loc:')
print(rows)
divider()


DataFrame with shuffled rows using .iloc:
      col1  col2  col3
row3   7.0   5.0   NaN
row4   NaN   8.0   9.0
row2   4.0   NaN   6.0
row1   1.0   2.0   3.0
--------------------------------------------------------------------------------
DataFrame with shuffled rows using .loc:
      col1  col2  col3
row2   4.0   NaN   6.0
row4   NaN   8.0   9.0
row1   1.0   2.0   3.0
row3   7.0   5.0   NaN
--------------------------------------------------------------------------------


In [6]:
# Python style slicing using .iloc

# First two rows
rows = df.iloc[:2]
print('Slicing first two rows from DataFrame with : using .iloc')
print(rows)
divider()

# Middle two rows
rows = df.iloc[1:3]
print('Slicing middle two rows from DataFrame with : using .iloc')
print(rows)
divider()

# Last two rows
rows = df.iloc[-2:]
print('Slicing last two rows from DataFrame with : using .iloc')
print(rows)
divider()


Slicing first two rows from DataFrame with : using .iloc
      col1  col2  col3
row1   1.0   2.0   3.0
row2   4.0   NaN   6.0
--------------------------------------------------------------------------------
Slicing middle two rows from DataFrame with : using .iloc
      col1  col2  col3
row2   4.0   NaN   6.0
row3   7.0   5.0   NaN
--------------------------------------------------------------------------------
Slicing last two rows from DataFrame with : using .iloc
      col1  col2  col3
row3   7.0   5.0   NaN
row4   NaN   8.0   9.0
--------------------------------------------------------------------------------


In [7]:
# Skipping elements - rows

rows = df.iloc[::2]
print('Print all columns, but stepping over rows by 2:')
print(rows)
divider()

Print all columns, but stepping over rows by 2:
      col1  col2  col3
row1   1.0   2.0   3.0
row3   7.0   5.0   NaN
--------------------------------------------------------------------------------


In [8]:
# Skipping elements - columns

rows = df.iloc[:,:2]
print('Print all rows, but first two columns:')
print(rows)
divider()


Print all rows, but first two columns:
      col1  col2
row1   1.0   2.0
row2   4.0   NaN
row3   7.0   5.0
row4   NaN   8.0
--------------------------------------------------------------------------------


In [9]:
# Skipping elements - row and columns

rows = df.iloc[:2, :2]
print('Print first two rows and columns:')
print(rows)
divider()


Print first two rows and columns:
      col1  col2
row1   1.0   2.0
row2   4.0   NaN
--------------------------------------------------------------------------------



---
[next]()