In [1]:
import pandas as pd

### A continuación creamos dos dataFrames con alguna columna en común en común

In [2]:
df_left = pd.DataFrame(
    {'X':['x0','x1','x2','x3'],
    'W':['w0','w1','w2','w3'],
    'Y':['y0','y1','y2','y3'],
    'Mix':['y2','y3','a2','a3']},
    index = [0,1,2,3])
df_left

Unnamed: 0,X,W,Y,Mix
0,x0,w0,y0,y2
1,x1,w1,y1,y3
2,x2,w2,y2,a2
3,x3,w3,y3,a3


In [3]:
df_right = pd.DataFrame(
    {'Z':['z2','z3','z4','z5'],
     'A':['a2','a3','a4','a5'],
     'Y':['y2','y3','y4','y5']},
    index = [2,3,4,5])
df_right

Unnamed: 0,Z,A,Y
2,z2,a2,y2
3,z3,a3,y3
4,z4,a4,y4
5,z5,a5,y5


### A continuación hacemos ejemplos con Merge, lo cual permite unificar la información de distintas tablas que comparten valores en común. Es equivalente a la unión de tablas con JOIN que se usa en lenguaje SQL.

In [6]:
#  |c c c c |    |y y =|
#  |c c c c |    |y y =|
#  |x x = x |    |d d d|   MERGE --> |x x = x y y|
#  |x x = x |    |d d d|  (inner)    |x x = x y y|

pd.merge(df_left, df_right, how = 'inner', on = 'Y')

Unnamed: 0,X,W,Y,Mix,Z,A
0,x2,w2,y2,a2,z2,a2
1,x3,w3,y3,a3,z3,a3


In [8]:
#  |x x x =|    |y y =|
#  |x x x =|    |y y =|
#  |c c c c|    |d d d|   MERGE --> |x x x = y y|
#  |c c c c|    |d d d|  (inner)    |x x x = y y|

pd.merge(df_left, df_right, how = 'inner', left_on = 'Mix',right_on='Y')

Unnamed: 0,X,W,Y_x,Mix,Z,A,Y_y
0,x0,w0,y0,y2,z2,a2,y2
1,x1,w1,y1,y3,z3,a3,y3


In [9]:
#  |c c c c|    |y = y|
#  |c c c c|    |y = y|
#  |x x x =|    |d d d|   MERGE --> |x x x = y y|
#  |x x x =|    |d d d|  (inner)    |x x x = y y|

pd.merge(df_left, df_right, how = 'inner', left_on = 'Mix', right_on = 'A')

Unnamed: 0,X,W,Y_x,Mix,Z,A,Y_y
0,x2,w2,y2,a2,z2,a2,y2
1,x3,w3,y3,a3,z3,a3,y3


In [10]:
# Equivalente al LEFT JOIN de SQL
pd.merge(df_left, df_right, how = 'left', on = 'Y')

Unnamed: 0,X,W,Y,Mix,Z,A
0,x0,w0,y0,y2,,
1,x1,w1,y1,y3,,
2,x2,w2,y2,a2,z2,a2
3,x3,w3,y3,a3,z3,a3


In [11]:
# Equivalente al RIGHT JOIN de SQL
pd.merge(df_left, df_right, how = 'right', on = 'Y')

Unnamed: 0,X,W,Y,Mix,Z,A
0,x2,w2,y2,a2,z2,a2
1,x3,w3,y3,a3,z3,a3
2,,,y4,,z4,a4
3,,,y5,,z5,a5


In [12]:
pd.merge(df_left, df_right, how = 'outer', on = 'Y')

Unnamed: 0,X,W,Y,Mix,Z,A
0,x0,w0,y0,y2,,
1,x1,w1,y1,y3,,
2,x2,w2,y2,a2,z2,a2
3,x3,w3,y3,a3,z3,a3
4,,,y4,,z4,a4
5,,,y5,,z5,a5
