# Data Preparation Basics

## 1. Filtering and selecting data

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

### Selecting and retrieving data

Can write index in two forms
- Label index or
- Integer index

In [2]:
# Label index
series_obj = Series(np.arange(8), index = ['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6', 'row 7', 'row 8'])
series_obj

row 1    0
row 2    1
row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
row 8    7
dtype: int32

In [3]:
series_obj['row 5']

4

In [4]:
# Integer index
series_obj[[0, 7]]

row 1    0
row 8    7
dtype: int32

In [5]:
# Preparing the 2D matrix
np.random.seed(24)
DF_obj = DataFrame(np.random.rand(36).reshape((6, 6)),
                   index=['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6'],
                   columns=['column 1', 'column 2', 'column 3', 'column 4', 'column 5', 'column 6'])
DF_obj

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.960017,0.699512,0.999867,0.220067,0.361056,0.739841
row 2,0.996456,0.316347,0.136545,0.38398,0.320519,0.366415
row 3,0.709652,0.900142,0.534115,0.247294,0.671807,0.561729
row 4,0.54256,0.893448,0.84278,0.306013,0.63117,0.680239
row 5,0.970428,0.893567,0.942426,0.642225,0.614648,0.227683
row 6,0.486032,0.807219,0.84422,0.534681,0.757798,0.499677


In [6]:
DF_obj.loc[['row 2', 'row 5'], ['column 5', 'column 2']]

Unnamed: 0,column 5,column 2
row 2,0.320519,0.316347
row 5,0.614648,0.893567


### Data slicing

In [7]:
series_obj['row 3':'row 7']

row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
dtype: int32

### Comparing with scalars

In [9]:
DF_obj < 0.2

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,False,False,False,False,False,False
row 2,False,False,True,False,False,False
row 3,False,False,False,False,False,False
row 4,False,False,False,False,False,False
row 5,False,False,False,False,False,False
row 6,False,False,False,False,False,False


### Filtering with scalars

In [10]:
series_obj[series_obj > 6]

row 8    7
dtype: int32

### Setting values with scalars

In [11]:
series_obj['row 1', 'row 3', 'row 5'] = 8
series_obj

row 1    8
row 2    1
row 3    8
row 4    3
row 5    8
row 6    5
row 7    6
row 8    7
dtype: int32