## Combining DataFrames

### Concatenating

In [2]:
import pandas as pd

In [4]:
data = [["Mark", 55, "Italy", 4.5, "Europe"],
        ["John", 33, "USA", 6.7, "America"],
        ["Tim", 41, "USA", 3.9, "America"],
        ["Jenny", 12, "Germany", 9.0, "Europe"]]
df = pd.DataFrame(data=data,
                  columns=["name", "age", "country",
                           "score", "continent"],
                  index=[1001, 1000, 1002, 1003])
df

Unnamed: 0,name,age,country,score,continent
1001,Mark,55,Italy,4.5,Europe
1000,John,33,USA,6.7,America
1002,Tim,41,USA,3.9,America
1003,Jenny,12,Germany,9.0,Europe


In [3]:
data = [[15, "France", 4.1, "Becky"],
        [44, "Canada", 6.1, "Leanne"]]
more_users = pd.DataFrame(data=data,
                          columns=["age", "country", "score", "name"],
                          index=[1000, 1011])
more_users

Unnamed: 0,age,country,score,name
1000,15,France,4.1,Becky
1011,44,Canada,6.1,Leanne


In [5]:
pd.concat([df, more_users], axis=0)

Unnamed: 0,name,age,country,score,continent
1001,Mark,55,Italy,4.5,Europe
1000,John,33,USA,6.7,America
1002,Tim,41,USA,3.9,America
1003,Jenny,12,Germany,9.0,Europe
1000,Becky,15,France,4.1,
1011,Leanne,44,Canada,6.1,


In [6]:
# If you want to glue two DataFrames together along the
# columns, set axis=1
data = [[3, 4],
        [5, 6]]
more_categories = pd.DataFrame(data=data,
                               columns=["quizzes", "logins"],
                               index=[1000, 2000])
more_categories

Unnamed: 0,quizzes,logins
1000,3,4
2000,5,6


In [7]:
pd.concat([df, more_categories], axis=1)

Unnamed: 0,name,age,country,score,continent,quizzes,logins
1001,Mark,55.0,Italy,4.5,Europe,,
1000,John,33.0,USA,6.7,America,3.0,4.0
1002,Tim,41.0,USA,3.9,America,,
1003,Jenny,12.0,Germany,9.0,Europe,,
2000,,,,,,5.0,6.0


### Joining and Merging

In [9]:
df1 = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]],
                   columns=["A", "B"])
df1

Unnamed: 0,A,B
0,1,2
1,3,4
2,5,6


In [10]:
df2 = pd.DataFrame(data=[[10, 20], [30, 40]],
                   columns=["C", "D"], index=[1, 3])
df2

Unnamed: 0,C,D
1,10,20
3,30,40


In [12]:
# inner join
df1.join(df2, how="inner")

Unnamed: 0,A,B,C,D
1,3,4,10,20


In [13]:
# left join
df1.join(df2, how="left")

Unnamed: 0,A,B,C,D
0,1,2,,
1,3,4,10.0,20.0
2,5,6,,


In [14]:
# right join
df1.join(df2, how="right")

Unnamed: 0,A,B,C,D
1,3.0,4.0,10,20
3,,,30,40


In [15]:
# outer join
df1.join(df2, how="outer")

Unnamed: 0,A,B,C,D
0,1.0,2.0,,
1,3.0,4.0,10.0,20.0
2,5.0,6.0,,
3,,,30.0,40.0


In [16]:
# Add a column called "category" to both DataFrames
df1["category"] = ["a", "b", "c"]
df2["category"] = ["c", "b"]

In [17]:
df1

Unnamed: 0,A,B,category
0,1,2,a
1,3,4,b
2,5,6,c


In [18]:
df2

Unnamed: 0,C,D,category
1,10,20,c
3,30,40,b


In [20]:
# inner merge by column category
df1.merge(df2, how="inner", on=["category"])

Unnamed: 0,A,B,category,C,D
0,3,4,b,30,40
1,5,6,c,10,20


In [22]:
# # left merge by column category
df1.merge(df2, how="left", on=["category"])

Unnamed: 0,A,B,category,C,D
0,1,2,a,,
1,3,4,b,30.0,40.0
2,5,6,c,10.0,20.0
