# Filters & Multivalue Columns

In [3]:
import numpy as np
import pandas as pd

In [4]:
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total
0,1/6/18,East,Jones,Pencil,95,1.99,189.05
1,1/23/18,Central,Kivell,Binder,50,19.99,999.5
2,2/9/18,Central,Jardine,Pencil,36,4.99,179.64
3,2/26/18,Central,Gill,Pen,27,19.99,539.73
4,3/15/18,West,Sorvino,Pencil,56,2.99,167.44


### Filtering by row or column index

In [6]:
data.filter(items=["Item", "Unit"]).head()

Unnamed: 0,Item
0,Pencil
1,Binder
2,Pencil
3,Pen
4,Pencil


In [7]:
data.filter(regex='^R').head()

Unnamed: 0,Region,Rep
0,East,Jones
1,Central,Kivell
2,Central,Jardine
3,Central,Gill
4,West,Sorvino


In [8]:
data.filter(like='it').head()

Unnamed: 0,Units,Unit Cost
0,95,1.99
1,50,19.99
2,36,4.99
3,27,19.99
4,56,2.99


### Same rules for index when axis 0 is specified

In [9]:
data.filter(items=[0,3,5], axis=0)

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total
0,1/6/18,East,Jones,Pencil,95,1.99,189.05
3,2/26/18,Central,Gill,Pen,27,19.99,539.73
5,4/1/18,East,Jones,Binder,60,4.99,299.4


### Dealing with multivalue columns

In [10]:
import random
x = [[random.randrange(1,10) for a in range(5)] for b in range(10)]
x

[[1, 9, 3, 2, 9],
 [7, 5, 9, 9, 2],
 [1, 4, 9, 2, 5],
 [6, 1, 6, 1, 1],
 [4, 9, 4, 7, 2],
 [8, 6, 3, 8, 4],
 [9, 5, 1, 9, 8],
 [9, 8, 9, 3, 3],
 [3, 9, 1, 6, 8],
 [3, 1, 8, 3, 3]]

In [13]:
names = "John Mary Ed Bill Frank Mark Sally Lix Wendy Henry"
df = pd.DataFrame({"Name" : names.split(), "Sales" : x})
df

Unnamed: 0,Name,Sales
0,John,"[1, 9, 3, 2, 9]"
1,Mary,"[7, 5, 9, 9, 2]"
2,Ed,"[1, 4, 9, 2, 5]"
3,Bill,"[6, 1, 6, 1, 1]"
4,Frank,"[4, 9, 4, 7, 2]"
5,Mark,"[8, 6, 3, 8, 4]"
6,Sally,"[9, 5, 1, 9, 8]"
7,Lix,"[9, 8, 9, 3, 3]"
8,Wendy,"[3, 9, 1, 6, 8]"
9,Henry,"[3, 1, 8, 3, 3]"


### To simply split

In [14]:
df2 = pd.DataFrame(df['Sales'].values.tolist(), columns=['May', 'June', 
                                                         'July', 'August', 'Sept'])

In [15]:
df2

Unnamed: 0,May,June,July,August,Sept
0,1,9,3,2,9
1,7,5,9,9,2
2,1,4,9,2,5
3,6,1,6,1,1
4,4,9,4,7,2
5,8,6,3,8,4
6,9,5,1,9,8
7,9,8,9,3,3
8,3,9,1,6,8
9,3,1,8,3,3


### Alternative method

In [20]:
pd.concat([df['Sales'].apply(pd.Series), df["Name"]], axis=1)

Unnamed: 0,0,1,2,3,4,Name
0,1,9,3,2,9,John
1,7,5,9,9,2,Mary
2,1,4,9,2,5,Ed
3,6,1,6,1,1,Bill
4,4,9,4,7,2,Frank
5,8,6,3,8,4,Mark
6,9,5,1,9,8,Sally
7,9,8,9,3,3,Lix
8,3,9,1,6,8,Wendy
9,3,1,8,3,3,Henry
