## Manipulation
#### Import package

In [1]:
import numpy as np
import pandas as pd

#### Concatenate Series and DataFrame
`pd.concat([a, b, c...], axis)`

In [2]:
a = pd.Series([1, 2, 3], index=[1, 2, 3])
b = pd.Series([4, 5, 6], index=[4, 5, 6])
pd.concat([a, b])

1    1
2    2
3    3
4    4
5    5
6    6
dtype: int64

In [3]:
a = pd.DataFrame([[1, 2, 3], [7, 8, 9]], columns=['A', 'B', 'C'])
b = pd.DataFrame([[4, 5, 6], [10, 11, 12]], columns=['A', 'B', 'C'])
pd.concat([a, b])

Unnamed: 0,A,B,C
0,1,2,3
1,7,8,9
0,4,5,6
1,10,11,12


<b>warning</b>: the index will be preserved, to ignore the original index, use `ignore_index=True`

In [4]:
pd.concat([a, b], ignore_index=True)

Unnamed: 0,A,B,C
0,1,2,3
1,7,8,9
2,4,5,6
3,10,11,12


In [5]:
pd.concat([a, b], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,1,2,3,4,5,6
1,7,8,9,10,11,12


#### Merge DataFrame with different columns
`pd.concat([a, b, c...], join='inner')`

In [6]:
a = pd.DataFrame([[1, 2, 3], [7, 8, 9]], columns=['A', 'B', 'C'])
b = pd.DataFrame([[4, 5, 6], [10, 11, 12]], columns=['C', 'D', 'E'])
pd.concat([a, b])

Unnamed: 0,A,B,C,D,E
0,1.0,2.0,3,,
1,7.0,8.0,9,,
0,,,4,5.0,6.0
1,,,10,11.0,12.0


In [7]:
pd.concat([a, b], join='inner')

Unnamed: 0,C
0,3
1,9
0,4
1,10


`pd.concat([a, b, c...], join='outer')`

In [8]:
pd.concat([a, b], join='outer')

Unnamed: 0,A,B,C,D,E
0,1.0,2.0,3,,
1,7.0,8.0,9,,
0,,,4,5.0,6.0
1,,,10,11.0,12.0


#### Another way to concatenate Series or DataFrame
`a.append(b)`

In [9]:
a = pd.Series([1, 2, 3], index=[1, 2, 3])
b = pd.Series([4, 5, 6], index=[4, 5, 6])
a.append(b)

1    1
2    2
3    3
4    4
5    5
6    6
dtype: int64

In [10]:
a = pd.DataFrame([[1, 2, 3], [7, 8, 9]], columns=['A', 'B', 'C'])
b = pd.DataFrame([[4, 5, 6], [10, 11, 12]], columns=['A', 'B', 'C'])
a.append(b)

Unnamed: 0,A,B,C
0,1,2,3
1,7,8,9
0,4,5,6
1,10,11,12


#### Merge DataFrame with same attributes
`pd.merge(a, b)`

In [11]:
a = pd.DataFrame({'name': ['Kevin', 'Jason', 'Zoe'], 'height': [175, 168, 170]})
b = pd.DataFrame({'name': ['Kevin', 'Jason', 'Zoe'], 'weight': [70, 65, 58]})
pd.merge(a, b)

Unnamed: 0,height,name,weight
0,175,Kevin,70
1,168,Jason,65
2,170,Zoe,58


In [12]:
a = pd.DataFrame({'name': ['Kevin', 'Jason', 'Zoe'], 'class': ['A', 'A', 'B']})
b = pd.DataFrame({'class': ['A', 'B'], 'supervisor': ['Lynn', 'Lydia']})
pd.merge(a, b)

Unnamed: 0,class,name,supervisor
0,A,Kevin,Lynn
1,A,Jason,Lynn
2,B,Zoe,Lydia


#### Merge DataFrame on specific attributes
`pd.merge(a, b, on=attributes)`

In [13]:
a = pd.DataFrame({'name': ['Kevin', 'Jason', 'Zoe'], 'class': ['A', 'A', 'B']})
b = pd.DataFrame({'class': ['A', 'B'], 'supervisor': ['Lynn', 'Lydia']})
pd.merge(a, b, on='class')

Unnamed: 0,class,name,supervisor
0,A,Kevin,Lynn
1,A,Jason,Lynn
2,B,Zoe,Lydia


<b>warning</b>: if the attributes are not consistent<br>
`pd.merge(a, b, left_on=attributes, right_on=attributes)`

In [14]:
a = pd.DataFrame({'name': ['Kevin', 'Jason', 'Zoe'], 'class': ['A', 'A', 'B']})
b = pd.DataFrame({'school_class': ['A', 'B'], 'supervisor': ['Lynn', 'Lydia']})
pd.merge(a, b, left_on='class', right_on='school_class')

Unnamed: 0,class,name,school_class,supervisor
0,A,Kevin,A,Lynn
1,A,Jason,A,Lynn
2,B,Zoe,B,Lydia
