# Join

## 상하 결합

In [2]:
# 상하 결합
import pandas as pd

df1 = pd.DataFrame({'A' : [1, 2, 3], 'B' : [11, 12, 13], 'C' : [21, 22, 23]})
df2 = pd.DataFrame({'A' : [4, 5, 6], 'B' : [14, 15, 16], 'C' : [24, 25, 26]})
print(df1)
print('')
print(df2)

   A   B   C
0  1  11  21
1  2  12  22
2  3  13  23

   A   B   C
0  4  14  24
1  5  15  25
2  6  16  26


In [3]:
pd.concat([df1, df2]) # index도  겹침

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
0,4,14,24
1,5,15,25
2,6,16,26


In [4]:
pd.concat([df1, df2], ignore_index = True) # 인덱스 순서대로

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
3,4,14,24
4,5,15,25
5,6,16,26


In [5]:
# 필드 순서가 섞였을 때 결합 결과
df1 = pd.DataFrame({'A' : [1, 2, 3], 'B' : [11, 12, 13], 'C' : [21, 22, 23]})
df2 = pd.DataFrame({'B' : [14, 15, 16], 'A' : [4, 5, 6], 'C' : [24, 25, 26]})
print(df1)
print('')
print(df2)

   A   B   C
0  1  11  21
1  2  12  22
2  3  13  23

    B  A   C
0  14  4  24
1  15  5  25
2  16  6  26


In [6]:
pd.concat([df1, df2]) # 각각 맞게 산출됨

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
0,4,14,24
1,5,15,25
2,6,16,26


In [7]:
# 서로 다른 필드로 구성되어 있는 데이터프레임의 결합
df1 = pd.DataFrame({'A' : [1, 2, 3], 'B' : [11, 12, 13], 'C' : [21, 22, 23], 'D' : [31, 32, 33]})
df2 = pd.DataFrame({'A' : [3, 4, 5], 'B' : [13, 14, 15], 'C' : [23, 24, 25], 'E' : [41, 42, 43]})
print(df1)
print('')
print(df2)

   A   B   C   D
0  1  11  21  31
1  2  12  22  32
2  3  13  23  33

   A   B   C   E
0  3  13  23  41
1  4  14  24  42
2  5  15  25  43


In [8]:
pd.concat([df1, df2]) # 설정이 없을 때 outer 개념

Unnamed: 0,A,B,C,D,E
0,1,11,21,31.0,
1,2,12,22,32.0,
2,3,13,23,33.0,
0,3,13,23,,41.0
1,4,14,24,,42.0
2,5,15,25,,43.0


In [9]:
pd.concat([df1, df2], join = 'outer')

Unnamed: 0,A,B,C,D,E
0,1,11,21,31.0,
1,2,12,22,32.0,
2,3,13,23,33.0,
0,3,13,23,,41.0
1,4,14,24,,42.0
2,5,15,25,,43.0


In [10]:
# inner
pd.concat([df1, df2], join = 'inner') # 교집합

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
0,3,13,23
1,4,14,24
2,5,15,25


## 좌우 결합

In [11]:
# 좌우 결합
import pandas as pd

df1 = pd.DataFrame({'A' : [1, 2, 3], 'B' : [11, 12, 13], 'C' : [21, 22, 23], 'D' : [31, 32, 33]})
df2 = pd.DataFrame({'E' : [3, 4, 5], 'F' : [13, 14, 15], 'G' : [23, 24, 25], 'H' : [41, 42, 43]})
print(df1)
print('')
print(df2)

   A   B   C   D
0  1  11  21  31
1  2  12  22  32
2  3  13  23  33

   E   F   G   H
0  3  13  23  41
1  4  14  24  42
2  5  15  25  43


In [12]:
pd.concat([df1, df2] , axis = 1)

Unnamed: 0,A,B,C,D,E,F,G,H
0,1,11,21,31,3,13,23,41
1,2,12,22,32,4,14,24,42
2,3,13,23,33,5,15,25,43


In [13]:
# 성별과 나이가 확인 된 유저들을 대상으로 키와 몸무게의 정보를 결합하시오
df1 = pd.DataFrame({'ID' : [1, 2, 3, 4, 5], '성별' : ['F', 'M', 'F', 'M', 'F'], '나이' : [20, 30, 40, 25, 42]})
df2 = pd.DataFrame({'ID' : [3, 4, 5, 6, 7], '키' : [160.5, 170.3, 180.1, 142.3, 153.7], '몸무게' : [45.1, 50.3, 72.1, 38,  42]})
pd.merge(df1, df2, how = 'left', on = 'ID')

Unnamed: 0,ID,성별,나이,키,몸무게
0,1,F,20,,
1,2,M,30,,
2,3,F,40,160.5,45.1
3,4,M,25,170.3,50.3
4,5,F,42,180.1,72.1


In [15]:
# 키, 몸무게, 성별, 나이 정보가 모두 확인 된 유저들의 정보를 출력하시오.
pd.merge(df1, df2, how = 'inner' , on ='ID')

Unnamed: 0,ID,성별,나이,키,몸무게
0,3,F,40,160.5,45.1
1,4,M,25,170.3,50.3
2,5,F,42,180.1,72.1


In [16]:
# 모든 유저들의 정보를 출력하시오.
pd.merge(df1, df2, how = 'outer', on ='ID')

Unnamed: 0,ID,성별,나이,키,몸무게
0,1,F,20.0,,
1,2,M,30.0,,
2,3,F,40.0,160.5,45.1
3,4,M,25.0,170.3,50.3
4,5,F,42.0,180.1,72.1
5,6,,,142.3,38.0
6,7,,,153.7,42.0
