In [0]:
import pandas as pd
import numpy as np

In [0]:
pd.set_option('display.float_format', '{:.2f}'.format)
np.set_printoptions(precision=1)

## Concatenate Numpy
---

In [8]:
x1 = np.random.rand(2, 5) * 10
x1

array([[5.5, 8.2, 8.4, 7.2, 7.6],
       [4.4, 5. , 5.6, 6.5, 5.5]])

In [9]:
x2 = np.random.rand(2, 5) * -1
x2 

array([[-0.6, -0.7, -0.9, -0.2, -0.9],
       [-0.9, -0.3, -0.1, -0.6, -0.1]])

In [10]:
np.concatenate([x1, x2])

array([[ 5.5,  8.2,  8.4,  7.2,  7.6],
       [ 4.4,  5. ,  5.6,  6.5,  5.5],
       [-0.6, -0.7, -0.9, -0.2, -0.9],
       [-0.9, -0.3, -0.1, -0.6, -0.1]])

In [11]:
np.concatenate([x1, x2], axis=1)

array([[ 5.5,  8.2,  8.4,  7.2,  7.6, -0.6, -0.7, -0.9, -0.2, -0.9],
       [ 4.4,  5. ,  5.6,  6.5,  5.5, -0.9, -0.3, -0.1, -0.6, -0.1]])

## Concat Pandas (Series)
---

In [15]:
s1 = pd.Series(x1[0], index=['a', 'b', 'c', 'd', 'e'])
s1

a   5.49
b   8.22
c   8.42
d   7.21
e   7.62
dtype: float64

In [16]:
s2 = pd.Series(x2[0], index=['c', 'd', 'e', 'f', 'g'])
s2

c   -0.55
d   -0.66
e   -0.91
f   -0.23
g   -0.94
dtype: float64

In [19]:
pd.concat([s1, s2]) # Index keys repeated

a    5.49
b    8.22
c    8.42
d    7.21
e    7.62
c   -0.55
d   -0.66
e   -0.91
f   -0.23
g   -0.94
dtype: float64

In [21]:
pd.concat([s1, s2], axis=1) # Concat using index keys

Unnamed: 0,0,1
a,5.49,
b,8.22,
c,8.42,-0.55
d,7.21,-0.66
e,7.62,-0.91
f,,-0.23
g,,-0.94


In [27]:
s1.reset_index() # Resets index. Adds column "index"

Unnamed: 0,index,0
0,a,5.49
1,b,8.22
2,c,8.42
3,d,7.21
4,e,7.62


In [28]:
s1.reset_index(drop=True) # The same but without "index" column

0   5.49
1   8.22
2   8.42
3   7.21
4   7.62
dtype: float64

## Concat pandas (DataFrames)
---

In [30]:
df1 = pd.DataFrame(np.random.rand(3, 2)*10, columns=['a', 'b'])
df1

Unnamed: 0,a,b
0,8.97,0.09
1,2.43,2.84
2,0.82,5.63


In [31]:
df2 = pd.DataFrame(np.random.rand(3, 2)*-1, columns=['a', 'b'], index=[2, 3, 4])
df2

Unnamed: 0,a,b
2,-0.36,-0.16
3,-0.5,-0.09
4,-0.47,-0.01


In [35]:
pd.concat([df1, df2]) # Index keys repeated

Unnamed: 0,a,b
0,8.97,0.09
1,2.43,2.84
2,0.82,5.63
2,-0.36,-0.16
3,-0.5,-0.09
4,-0.47,-0.01


In [42]:
pd.concat([df1, df2], axis=1) # Concat using index keys.

Unnamed: 0,a,b,a.1,b.1
0,8.97,0.09,,
1,2.43,2.84,,
2,0.82,5.63,-0.36,-0.16
3,,,-0.5,-0.09
4,,,-0.47,-0.01


In [48]:
# Join Inner concats just common elements between dataframes
pd.concat([df1, df2], axis=1, join='inner')

Unnamed: 0,a,b,a.1,b.1
2,0.82,5.63,-0.36,-0.16


In [51]:
df2.reset_index() # Reset index. Added "index" column.

Unnamed: 0,index,a,b
0,2,-0.36,-0.16
1,3,-0.5,-0.09
2,4,-0.47,-0.01


In [52]:
df2.reset_index(drop=True) # The same but without "index" column.

Unnamed: 0,a,b
0,-0.36,-0.16
1,-0.5,-0.09
2,-0.47,-0.01


## Append Pandas (DataFrames)
---

In [53]:
df1.append(df2)

Unnamed: 0,a,b
0,8.97,0.09
1,2.43,2.84
2,0.82,5.63
2,-0.36,-0.16
3,-0.5,-0.09
4,-0.47,-0.01


In [57]:
df1.T.append(df2.T).T

Unnamed: 0,a,b,a.1,b.1
0,8.97,0.09,,
1,2.43,2.84,,
2,0.82,5.63,-0.36,-0.16
3,,,-0.5,-0.09
4,,,-0.47,-0.01


## Merge Pandas (DataFrames)
---

In [60]:
df_left = pd.DataFrame(
                      {'X': ['x0', 'x1', 'x2', 'x3'],
                        'W': ['w0', 'w1', 'w2', 'w3'], 
                        'Y': ['y0', 'y1', 'y2', 'y3'],
                        'Mix': ['y2', 'y3', 'a2', 'a3']},
                       index=[0,1,2,3])
df_left

Unnamed: 0,X,W,Y,Mix
0,x0,w0,y0,y2
1,x1,w1,y1,y3
2,x2,w2,y2,a2
3,x3,w3,y3,a3


In [61]:
df_right = pd.DataFrame(
                       {'Z': ['z2', 'z3', 'z4', 'z5'],
                         'A': ['a2', 'a3', 'a4', 'a5'], 
                         'Y': ['y2', 'y3', 'y4', 'y5']},
                        index=[2,3,4,5])
df_right

Unnamed: 0,Z,A,Y
2,z2,a2,y2
3,z3,a3,y3
4,z4,a4,y4
5,z5,a5,y5


In [63]:
pd.merge(df_left, df_right) # Inner Join using common column as key

Unnamed: 0,X,W,Y,Mix,Z,A
0,x2,w2,y2,a2,z2,a2
1,x3,w3,y3,a3,z3,a3


In [64]:
pd.merge(df_left, df_right, how='inner', on='Y') # The same that above

Unnamed: 0,X,W,Y,Mix,Z,A
0,x2,w2,y2,a2,z2,a2
1,x3,w3,y3,a3,z3,a3


In [66]:
# Using Mix as key for left DataFrame and Y as key for right DataFrame
pd.merge(df_left, df_right, how='inner', left_on='Mix', right_on='Y')

Unnamed: 0,X,W,Y_x,Mix,Z,A,Y_y
0,x0,w0,y0,y2,z2,a2,y2
1,x1,w1,y1,y3,z3,a3,y3


**Merge types**:

![](http://www.datasciencemadesimple.com/wp-content/uploads/2017/09/join-or-merge-in-python-pandas-1.png)

## Merge Pandas (DataFrame)

Multiple common columns

---

In [76]:
df_left = pd.DataFrame(
                      {'X': ['x0', 'x1', 'x2', 'x3'],
                        'W': ['w0', 'w1', 'w2', 'w3'], 
                        'Y': ['y0', 'y1', 'y2', 'y3'],
                        'A': ['a0', 'a1', 'a2', 'a3']},
                       index=[0,1,2,3])
df_left

Unnamed: 0,X,W,Y,A
0,x0,w0,y0,a0
1,x1,w1,y1,a1
2,x2,w2,y2,a2
3,x3,w3,y3,a3


In [77]:
df_right

Unnamed: 0,Z,A,Y
2,z2,a2,y2
3,z3,a3,y3
4,z4,a4,y4
5,z5,a5,y5


In [78]:
pd.merge(df_left, df_right, how='outer', on=['Y', 'A'])

Unnamed: 0,X,W,Y,A,Z
0,x0,w0,y0,a0,
1,x1,w1,y1,a1,
2,x2,w2,y2,a2,z2
3,x3,w3,y3,a3,z3
4,,,y4,a4,z4
5,,,y5,a5,z5


In [82]:
# Suffixes for repeated columns. Y in this example.
pd.merge(df_left, df_right, how='inner', on='A', suffixes=['_left', '_right'])

Unnamed: 0,X,W,Y_left,A,Z,Y_right
0,x2,w2,y2,a2,z2,y2
1,x3,w3,y3,a3,z3,y3
