# Concatenation (Stacking Tables)
- putting tables together : vertically (one below another like adding rows), horizontally (side by side like adding columns)
- need:
1. collect multiple months of data -> stack vertically
2. split large data into parts -> stack back later
3. combine different feature sets -> stack horizontally

In [2]:
import pandas as pd

# Table 1
df1 = pd.DataFrame({
    'Name': ['Alice', 'Bob'],
    'Department': ['HR', 'IT']
})

# Table 2
df2 = pd.DataFrame({
    'Name': ['Charlie', 'David'],
    'Department': ['Finance', 'IT']
})

df1, df2

(    Name Department
 0  Alice         HR
 1    Bob         IT,
       Name Department
 0  Charlie    Finance
 1    David         IT)

In [None]:
# Concat vertically - indexes stay the same

combined = pd.concat([df1, df2])
combined

Unnamed: 0,Name,Department
0,Alice,HR
1,Bob,IT
0,Charlie,Finance
1,David,IT


In [5]:
# to change the index to sequence 0,1,2,3

combined = pd.concat([df1, df2], ignore_index=True)
combined

Unnamed: 0,Name,Department
0,Alice,HR
1,Bob,IT
2,Charlie,Finance
3,David,IT


In [6]:
# for further code -

# Table 3: Age Data
df3 = pd.DataFrame({
    'Age': [25, 30]
})

# Table 4: Salary Data
df4 = pd.DataFrame({
    'Salary': [50000, 60000]
})

df3, df4


(   Age
 0   25
 1   30,
    Salary
 0   50000
 1   60000)

In [None]:
# horizontal concat - dupe columns form

combined = pd.concat([df1, df2], axis=1)
combined

Unnamed: 0,Name,Department,Name.1,Department.1
0,Alice,HR,Charlie,Finance
1,Bob,IT,David,IT


In [None]:
# handle cols with different names but need to be concat hence NaN gets added up

df5 = pd.DataFrame({
    'Name': ['Eve'],
    'Location': ['Mumbai']
})

combined = pd.concat([df1, df5], ignore_index=True)
combined


Unnamed: 0,Name,Department,Location
0,Alice,HR,
1,Bob,IT,
2,Eve,,Mumbai


In [None]:
# Horizontal with diff len - adds NaN
 
df6 = pd.DataFrame({'X': [1, 2, 3]})
df7 = pd.DataFrame({'Y': [4, 5]})

combined = pd.concat([df6, df7], axis=1)
combined

Unnamed: 0,X,Y
0,1,4.0
1,2,5.0
2,3,


In [11]:
# Keys- add keys to identify where the diff tables  value start in vertical stack

combined = pd.concat([df1,df2], keys=['First', 'Second'])
combined

Unnamed: 0,Unnamed: 1,Name,Department
First,0,Alice,HR
First,1,Bob,IT
Second,0,Charlie,Finance
Second,1,David,IT
