## Combine Dataset in Pandas 

- Create two dataframes that have common columns, say ${\bf x}_1$

In [28]:
import pandas as pd

adf = pd.DataFrame(data={'x1': ['A', 'B', 'C'], 'x2' : [1, 2, 3]})

In [29]:
adf

Unnamed: 0,x1,x2
0,A,1
1,B,2
2,C,3


In [30]:
bdf = pd.DataFrame(data={'x1': ['A', 'B', 'D'], 'x3' : ['T', 'F', 'T']})

In [31]:
bdf

Unnamed: 0,x1,x3
0,A,T
1,B,F
2,D,T


### Merge adf and bdf based on adf

In [38]:
pd.merge(adf, bdf, how='left')

Unnamed: 0,x1,x2,x3
0,A,1,T
1,B,2,F
2,C,3,


### Merge adf and bdf based on bdf

In [42]:
pd.merge(adf, bdf, how= 'right')

Unnamed: 0,x1,x2,x3
0,A,1.0,T
1,B,2.0,F
2,D,,T


### Merge adf and bdf based on what they have in common

In [40]:
pd.merge(adf, bdf, how= 'inner')

Unnamed: 0,x1,x2,x3
0,A,1,T
1,B,2,F


### Another method for above task

In [21]:
pd.merge(adf, bdf)

Unnamed: 0,x1,x2,x3
0,A,1,T
1,B,2,F


### Union of merge for adf and bdf

In [45]:
pd.merge(adf, bdf, how= 'outer')

Unnamed: 0,x1,x2,x3
0,A,1.0,T
1,B,2.0,F
2,C,3.0,
3,D,,T


### Appending Pandas dataframe

In [48]:
# Append bdf after bdf (row-wise concatination)
pd.concat([adf, bdf], sort=True)

Unnamed: 0,x1,x2,x3
0,A,1.0,
1,B,2.0,
2,C,3.0,
0,A,,T
1,B,,F
2,D,,T


In [50]:
# Column-wise concatination 
pd.concat([adf, bdf], axis=1)

Unnamed: 0,x1,x2,x1.1,x3
0,A,1,A,T
1,B,2,B,F
2,C,3,D,T


In [51]:
cdf = pd.DataFrame(data={'x1': ['A', 'B', 'C'], 'x3' : ['T', 'F', 'T']})
# cdf
pd.concat([adf, cdf], axis=1)

Unnamed: 0,x1,x2,x1.1,x3
0,A,1,A,T
1,B,2,B,F
2,C,3,C,T


In [25]:
pd.concat([adf, cdf], sort=False)

Unnamed: 0,x1,x2,x3
0,A,1.0,
1,B,2.0,
2,C,3.0,
0,A,,T
1,B,,F
2,C,,T


In [52]:
# Correct the indexing after concatination
df = pd.concat([adf, cdf], ignore_index=True)
df

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  


Unnamed: 0,x1,x2,x3
0,A,1.0,
1,B,2.0,
2,C,3.0,
3,A,,T
4,B,,F
5,C,,T


In [53]:
pd.merge(adf, cdf, how='left')

Unnamed: 0,x1,x2,x3
0,A,1,T
1,B,2,F
2,C,3,T


## When two dataframes have exactly the same columns

In [57]:

df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])

df = df.append(df2, ignore_index=True)

In [55]:
df

Unnamed: 0,a,b
0,1,2
1,3,4
2,5,6
3,7,8


### Slicing dataframe based on largest value for an specific column

In [58]:
import numpy as np

df = pd.DataFrame({'a': [1, 10, 8, 11, -1],'b': list('abdce'), 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
df.nlargest(3, 'a')

Unnamed: 0,a,b,c
3,11,c,3.0
1,10,b,2.0
2,8,d,


In [71]:
df

Unnamed: 0,a,b,c
0,1,a,1.0
1,10,b,2.0
2,8,d,
3,11,c,3.0
4,-1,e,4.0


In [72]:
df['c'].isnull().sum()

1

In [73]:
df.isnull().sum()

a    0
b    0
c    1
dtype: int64

In [74]:
data = {'weekday': ["Monday", "Tuesday", "Wednesday", 
         "Thursday", "Friday", "Saturday", "Sunday"],
        'Person 1': [12, 6, 5, 8, 11, 6, 4],
        'Person 2': [10, 6, 11, 5, 8, 9, 12],
        'Person 3': [8, 5, 7, 3, 7, 11, 15]}
df = pd.DataFrame(data, columns=['weekday',
        'Person 1', 'Person 2', 'Person 3'])

In [75]:
df

Unnamed: 0,weekday,Person 1,Person 2,Person 3
0,Monday,12,10,8
1,Tuesday,6,6,5
2,Wednesday,5,11,7
3,Thursday,8,5,3
4,Friday,11,8,7
5,Saturday,6,9,11
6,Sunday,4,12,15


### Reshape Pandas dataframe with Melt

In [76]:
# Reshape Pandas Data With Melt
melted = pd.melt(df, id_vars=["weekday"], 
                 var_name="Person", value_name="Score")

In [77]:
melted

Unnamed: 0,weekday,Person,Score
0,Monday,Person 1,12
1,Tuesday,Person 1,6
2,Wednesday,Person 1,5
3,Thursday,Person 1,8
4,Friday,Person 1,11
5,Saturday,Person 1,6
6,Sunday,Person 1,4
7,Monday,Person 2,10
8,Tuesday,Person 2,6
9,Wednesday,Person 2,11


In [78]:
# https://deparkes.co.uk/2016/10/28/reshape-pandas-data-with-melt/

### More Expercise

- https://martin-thoma.com/pandas-merge-join-concatenate/

- https://github.com/codebasics/py/blob/master/pandas/11_melt/pandas_melt_tutorial.ipynb