# <font color="purple"><h3 align="center">Pandas Concat() Tutorial</h3></font>

<img src="concat_pandas.JPG" width="600" height="300">

In [1]:
"""
All Parameter for concat():

        axis = 0, [0/’index’, 1/’columns’] 
        
        join = ‘outer’ [‘inner’, ‘outer’] 
        
        ignore_index = False [True, False]
        
        keys = None, [sequence]         
        
        levels = None, [list of sequences] 
        
        names = None, [list] 
        
        verify_integrity = False  [True, False]
        
        sort = False, [True, False]
        
        copy = True [True, False]
        
"""
print('All parameter are shown above.')

All parameter are shown above.


## <font color="maganta"><h3 align="center">Combine Series</h3></font> 

In [1]:
import pandas as pd
import numpy as np

In [3]:
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1, s2])

0    a
1    b
0    c
1    d
dtype: object

In [4]:
pd.concat([s1, s2], ignore_index=True)

0    a
1    b
2    c
3    d
dtype: object

In [5]:
#A hierarchical index at the outermost level of the data is added with the 'keys' option.
pd.concat([s1, s2], keys=['s1', 's2'])

s1  0    a
    1    b
s2  0    c
    1    d
dtype: object

In [6]:
#use 'names' option
pd.concat([s1, s2], keys=['s1', 's2'],
          names=['Series name', 'Row ID'])

Series name  Row ID
s1           0         a
             1         b
s2           0         c
             1         d
dtype: object

## <font color="maganta"><h3 align="center">Combine DataFrames</h3></font> 

### <font color='green'>1. Combine rows / Concatenation Using Column / Vertically concat w.r.t column</font>

#### <font color="blue">1.1 All columns are identical</font> 

In [7]:
bd_weather = pd.DataFrame({
    "city": ["dhaka","rajshahi","sylhet"],
    "temperature": [32,45,30],
    "humidity": [80, 60, 78]
})
bd_weather

Unnamed: 0,city,temperature,humidity
0,dhaka,32,80
1,rajshahi,45,60
2,sylhet,30,78


In [8]:
us_weather = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
    "humidity": [68, 65, 75]
})
us_weather

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [9]:
d1 = pd.concat([bd_weather, us_weather])
#df = bd_weather.append(us_weather)
d1

Unnamed: 0,city,temperature,humidity
0,dhaka,32,80
1,rajshahi,45,60
2,sylhet,30,78
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


#### <font color="blue">1.2 All columns are not identical or different<font>

In [10]:
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df1

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [11]:
df2 = pd.DataFrame({'C': [7, 8], 'D': [10, 11]})
df2

Unnamed: 0,C,D
0,7,10
1,8,11


In [12]:
#combine df
pd.concat([df1,df2])

Unnamed: 0,A,B,C,D
0,1.0,4.0,,
1,2.0,5.0,,
2,3.0,6.0,,
0,,,7.0,10.0
1,,,8.0,11.0


### <font color='green'>4. Combine Columns / Concatenation Using Index / Horizontally concat w.r.t index</font>

#### exercise 1

In [13]:
f1 = pd.DataFrame({
    "city": ["dhaka","rajshahi","sylhet"],
    "famous": ['biriyani','mango','tea'],
    
})
#set 'city' as index
f1 = f1.set_index('city')
f1

Unnamed: 0_level_0,famous
city,Unnamed: 1_level_1
dhaka,biriyani
rajshahi,mango
sylhet,tea


In [14]:
f2 = pd.DataFrame({
    "city": ["rajshahi","sylhet","dhaka"],
    "tag": ['clean city','hill town','toxic city'],
    
})
f2 = f2.set_index('city')
f2

Unnamed: 0_level_0,tag
city,Unnamed: 1_level_1
rajshahi,clean city
sylhet,hill town
dhaka,toxic city


In [15]:
d4 = pd.concat([f1,f2],axis=1)
d4

Unnamed: 0_level_0,famous,tag
city,Unnamed: 1_level_1,Unnamed: 2_level_1
dhaka,biriyani,toxic city
rajshahi,mango,clean city
sylhet,tea,hill town


#### exercise 2

In [16]:
f3 = pd.DataFrame({
    "city": ["rajshahi","sylhet","dhaka","Cox-bazar"],
    "tag": ['clean city','hill town','toxic city','sea beach'],
})
f3 = f3.set_index('city')
f3

Unnamed: 0_level_0,tag
city,Unnamed: 1_level_1
rajshahi,clean city
sylhet,hill town
dhaka,toxic city
Cox-bazar,sea beach


In [17]:
d5 = pd.concat([f1,f3],axis=1)
d5

Unnamed: 0_level_0,famous,tag
city,Unnamed: 1_level_1,Unnamed: 2_level_1
dhaka,biriyani,toxic city
rajshahi,mango,clean city
sylhet,tea,hill town
Cox-bazar,,sea beach


### <font color='green'>Parameter : 'ignore_index'</font>

In [18]:
#row combine
pd.concat([bd_weather, us_weather], ignore_index=True)
#bd_weather.append(us_weather, ignore_index=True)

Unnamed: 0,city,temperature,humidity
0,dhaka,32,80
1,rajshahi,45,60
2,sylhet,30,78
3,new york,21,68
4,chicago,14,65
5,orlando,35,75


In [19]:
#column combine
pd.concat([bd_weather, us_weather], axis=1,ignore_index=True)

Unnamed: 0,0,1,2,3,4,5
0,dhaka,32,80,new york,21,68
1,rajshahi,45,60,chicago,14,65
2,sylhet,30,78,orlando,35,75


### <font color='green'> Parameter: 'Join'</font>

In [20]:
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df1

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [21]:
df2 = pd.DataFrame({'C': [7, 8], 'D': [10, 11]})
df2

Unnamed: 0,C,D
0,7,10
1,8,11


#### exercise 1: row combine

In [22]:
pd.concat([df1,df2])  #default: join='outher'

Unnamed: 0,A,B,C,D
0,1.0,4.0,,
1,2.0,5.0,,
2,3.0,6.0,,
0,,,7.0,10.0
1,,,8.0,11.0


In [23]:
df=pd.concat([df1,df2],join='inner')
df

0
1
2
0
1


#### exercise 2: column combine

In [24]:
pd.concat([df1,df2],axis=1)  #join ='outer'

Unnamed: 0,A,B,C,D
0,1,4,7.0,10.0
1,2,5,8.0,11.0
2,3,6,,


In [25]:
pd.concat([df1,df2],axis=1,join='inner')

Unnamed: 0,A,B,C,D
0,1,4,7,10
1,2,5,8,11


### <font color='green'>Parameter : 'Keys'</font>

#### exercise 1 : row combine

In [26]:
dd = pd.concat([bd_weather, us_weather], keys=["bd", "us"])
dd

Unnamed: 0,Unnamed: 1,city,temperature,humidity
bd,0,dhaka,32,80
bd,1,rajshahi,45,60
bd,2,sylhet,30,78
us,0,new york,21,68
us,1,chicago,14,65
us,2,orlando,35,75


In [27]:
dd.loc["us"]

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [28]:
dd.loc["bd"]

Unnamed: 0,city,temperature,humidity
0,dhaka,32,80
1,rajshahi,45,60
2,sylhet,30,78


#### exercise 2 : column combine

In [29]:
ff = pd.concat([bd_weather, us_weather], axis=1, keys=["bd", "us"])
ff

Unnamed: 0_level_0,bd,bd,bd,us,us,us
Unnamed: 0_level_1,city,temperature,humidity,city,temperature,humidity
0,dhaka,32,80,new york,21,68
1,rajshahi,45,60,chicago,14,65
2,sylhet,30,78,orlando,35,75


In [30]:
ff.loc[:,"us"]

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [31]:
ff.loc[:,"bd"]

Unnamed: 0,city,temperature,humidity
0,dhaka,32,80
1,rajshahi,45,60
2,sylhet,30,78


### <font color='green'>Parameter : 'Keys' (2nd Way: 'keys' = 'dict keys')</font>

You can also pass a dict to concat() in which case the dict keys will be used for the keys argument unless other keys argument is specified.

In [20]:
f1 = pd.DataFrame( 
    {
    "A": ["A0", "A1", "A2", "A3"],
    "B": ["B0", "B1", "B2", "B3"],
    "C": ["C0", "C1", "C2", "C3"],
    "D": ["D0", "D1", "D2", "D3"],
    }, 
    index=[0, 1, 2, 3], )
 
f2 = pd.DataFrame(
    {
    "A": ["A4", "A5", "A6", "A7"],
    "B": ["B4", "B5", "B6", "B7"],
    "C": ["C4", "C5", "C6", "C7"],
    "D": ["D4", "D5", "D6", "D7"],
    },
    index=[4, 5, 6, 7], )

f3 = pd.DataFrame(
    {
    "A": ["A8", "A9", "A10", "A11"],
    "B": ["B8", "B9", "B10", "B11"],
    "C": ["C8", "C9", "C10", "C11"],
    "D": ["D8", "D9", "D10", "D11"],
    },
     index=[8, 9, 10, 11], ) 
    
pieces = {"x": f1, "y": f2, "z": f3}

result = pd.concat(pieces)
print(result)

        A    B    C    D
x 0    A0   B0   C0   D0
  1    A1   B1   C1   D1
  2    A2   B2   C2   D2
  3    A3   B3   C3   D3
y 4    A4   B4   C4   D4
  5    A5   B5   C5   D5
  6    A6   B6   C6   D6
  7    A7   B7   C7   D7
z 8    A8   B8   C8   D8
  9    A9   B9   C9   D9
  10  A10  B10  C10  D10
  11  A11  B11  C11  D11


#### interesting feature

In [10]:
result = pd.concat(pieces, keys=['z','x'])
result

Unnamed: 0,Unnamed: 1,A,B,C,D
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9
z,10,A10,B10,C10,D10
z,11,A11,B11,C11,D11
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3


In [11]:
result.index.levels

FrozenList([['z', 'x'], [0, 1, 2, 3, 8, 9, 10, 11]])

In [13]:
result2 = pd.concat(pieces, levels=[["z", "y", "x", "w"]], names=["group_key"]) #alteady keys=['x','y','z']
result2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
group_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9


In [14]:
result2.index.levels

FrozenList([['z', 'y', 'x', 'w'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]])

### <font color='green'>Parameter : 'names'</font>

#### exercise 1

In [32]:
dn = pd.concat([bd_weather, us_weather], keys=["bd", "us"],names=['country','city_id'])
dn

Unnamed: 0_level_0,Unnamed: 1_level_0,city,temperature,humidity
country,city_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bd,0,dhaka,32,80
bd,1,rajshahi,45,60
bd,2,sylhet,30,78
us,0,new york,21,68
us,1,chicago,14,65
us,2,orlando,35,75


#### exercise 2

In [33]:
dn = pd.concat([bd_weather, us_weather], axis=1, keys=["bd", "us"],names=['country','city_id'])
dn

country,bd,bd,bd,us,us,us
city_id,city,temperature,humidity,city,temperature,humidity
0,dhaka,32,80,new york,21,68
1,rajshahi,45,60,chicago,14,65
2,sylhet,30,78,orlando,35,75


#### exercise 3

In [34]:
d1 = pd.DataFrame(dict(A=.1, B=.2, C=.3), [2, 3])
d2 = pd.DataFrame(dict(B=.4, C=.5, D=.6), [1, 2])
d3 = pd.DataFrame(dict(A=.7, B=.8, D=.9), [1, 3])

# Concatenate the data frames with specified keys and levels
result = pd.concat([d1, d2, d3], keys=['d1', 'd2', 'd3'], names=['level1', 'level2'])

print(result)

                 A    B    C    D
level1 level2                    
d1     2       0.1  0.2  0.3  NaN
       3       0.1  0.2  0.3  NaN
d2     1       NaN  0.4  0.5  0.6
       2       NaN  0.4  0.5  0.6
d3     1       0.7  0.8  NaN  0.9
       3       0.7  0.8  NaN  0.9


#### exercise 4 (same work with different ways)

In [35]:
# Example DataFrames with hierarchical indexes
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, 
                   index=pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')], names=['Level1', 'Level2']))

df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]}, 
                   index=pd.MultiIndex.from_tuples([('Y', 'c'), ('Y', 'd')], names=['Level1', 'Level2']))

df3 = pd.DataFrame({'A': [9, 10], 'B': [11, 12]}, 
                   index=pd.MultiIndex.from_tuples([('Z', 'e'), ('Z', 'f')], names=['Level1', 'Level2']))

result = pd.concat([df1, df2, df3])
result

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Level1,Level2,Unnamed: 2_level_1,Unnamed: 3_level_1
X,a,1,3
X,b,2,4
Y,c,5,7
Y,d,6,8
Z,e,9,11
Z,f,10,12


#### exercise with problem, why names now shown??

In [36]:
# Example DataFrames with hierarchical indexes
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, 
                   index=pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]))

df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]}, 
                   index=pd.MultiIndex.from_tuples([('Y', 'c'), ('Y', 'd')]))

df3 = pd.DataFrame({'A': [9, 10], 'B': [11, 12]}, 
                   index=pd.MultiIndex.from_tuples([('Z', 'e'), ('Z', 'f')]))

result = pd.concat([df1, df2, df3],names=['Level1', 'Level2'])
result

Unnamed: 0,Unnamed: 1,A,B
X,a,1,3
X,b,2,4
Y,c,5,7
Y,d,6,8
Z,e,9,11
Z,f,10,12


## <font color="maganta"><h3 align="center">Combine DataFrames with Series</h3></font> 

#### exercise 1 : add series into column

In [37]:
s = pd.Series(["Humid","Dry","Rain"], name="event")
s

0    Humid
1      Dry
2     Rain
Name: event, dtype: object

In [38]:
df = pd.concat([bd_weather,s],axis=1)
df

Unnamed: 0,city,temperature,humidity,event
0,dhaka,32,80,Humid
1,rajshahi,45,60,Dry
2,sylhet,30,78,Rain


#### exercise 2 : add series into row

In [39]:
df = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
df

Unnamed: 0,a,b
0,1,2


In [40]:
new_row = pd.Series({'a': 3, 'b': 4})
new_row

a    3
b    4
dtype: int64

In [41]:
pd.concat([df, new_row.to_frame().T], ignore_index=True)

#convert series to dataframe:  series.to_frame()

Unnamed: 0,a,b
0,1,2
1,3,4


## <font color="maganta"><h3 align="center">Two Extra Combine function: append() & join()</h3></font> 

### <font color='green'> 1. Use append() for row combine</font>

In [42]:
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})

appended_rows = df1.append(df2,ignore_index=True)
appended_rows

  appended_rows = df1.append(df2,ignore_index=True)


Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6
3,7,10
4,8,11
5,9,12


### <font color='green'>2. Use join() for column combine</font>

In [43]:
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'C': [7, 8, 9], 'D': [10, 11, 12]})
joined_df = df1.join(df2)
joined_df

Unnamed: 0,A,B,C,D
0,1,4,7,10
1,2,5,8,11
2,3,6,9,12


In [44]:
df1 = pd.DataFrame({'A': [1, 2, 3]}, index=['a', 'b', 'c'])
df2 = pd.DataFrame({'B': [4, 5, 6]}, index=['a', 'b', 'd'])

# Joining on index
joined_df = df1.join(df2)
joined_df

Unnamed: 0,A,B
a,1,4.0
b,2,5.0
c,3,


### <font color="green">Key Points:</font>

In [47]:
"""
Key Points:

1. df = pd.concat([df1, df2]) --> concat rows with previous index.
2. df = pd.concat([df1, df2],ignore_index=True)--> concat rows with new index.
3. df = pd.concat([df1, df2],axis=1) --> concat columns and it works w.r.t index.
4. df = df1.append(df2) and df = pd.concat([df1, df2]) are same.
5. df = df1.join(df2) and df = pd.concat([df1, df2],axis=1) are same.

"""
print('All key points are shown above.')

All key points are shown above.


In [48]:
#For more details follow:
#https://pandas.pydata.org/docs/reference/api/pandas.concat.html