<a href="https://colab.research.google.com/github/DataEtnos/estudos_python/blob/main/merge_concat_join_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import plotly.express as px

from sklearn import datasets
from sklearn.preprocessing import scale, minmax_scale, power_transform

from IPython.core.display import HTML
from ipywidgets import interact, widgets

In [11]:
sns.set_theme(
    context='talk',
    style='ticks',
    font_scale=.8,
    rc={
        'figure.figsize': (12,8)
    }
)

In [12]:
#função pra deixar as tabelas na mesma linha (lado a lado)
def display_side_by_side(dataframes:list,titles:list):
    html_str=''
    for df,title in zip(dataframes, titles):
        html_str+=f'<span>{title}:</span>'
        html_str+=df.to_html().replace('table','table style="display:inline"')
        html_str+="&nbsp"*10
    display(HTML(html_str))

In [13]:
df1 = pd.DataFrame(
    {
        "key": ["K0", "K1", "K2", "K3"],
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
    }, index=range(4)
)

df2 = pd.DataFrame(
    {
        "key": ["K1", "K2", "K3", "K4"],
        "C": ["C1", "C2", "C3", "C4"],
        "D": ["D1", "D2", "D3", "D4"],
    }, index=range(1,5)
)

df3 = pd.DataFrame(
    {
        "key": ["K1", "K2", "K3", "K4"],
        "E": ["E1", "E2", "E3", "E4"],
        "F": ["F1", "F2", "F3", "F4"],
    }, index=range(1,5)
)

In [14]:
#tabelas para extração de conhecimento de agrupamento  estilo (merge)
display_side_by_side([df1,df2, df3], ['df1','df2', 'df3'])

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3

Unnamed: 0,key,C,D
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3
4,K4,C4,D4

Unnamed: 0,key,E,F
1,K1,E1,F1
2,K2,E2,F2
3,K3,E3,F3
4,K4,E4,F4


In [15]:
# Mostrar merge dos dois jeitos esse exemplo assim:
# Por padrão, ele traz um inner join das tabelas, trazendo somente os resultados que convergem
#exemplo 1:
df1.merge(df2)



Unnamed: 0,key,A,B,C,D
0,K1,A1,B1,C1,D1
1,K2,A2,B2,C2,D2
2,K3,A3,B3,C3,D3


In [16]:
# Também é possível setar como queremos a convergencia (HOW =  " Inner","left","right","outer")
# Exemplo 2:

df1.merge(df3, on='key', how='inner')



Unnamed: 0,key,A,B,E,F
0,K1,A1,B1,E1,F1
1,K2,A2,B2,E2,F2
2,K3,A3,B3,E3,F3


In [17]:
# Mostrar todos
df_inner = df1.merge(df2, on='key', how='inner') #mantém todas as categorias em comum nos dois df
df_left = df1.merge(df2, on='key', how='left') #mantém todas as categorias existentes na coluna da esquerda
df_right = df1.merge(df2, on='key', how='right') #mantém todas as categorias existentes na coluna da direita


#Função de mostrar tabela -> display_side_by_side([lista = formato string])

display_side_by_side(
    dataframes=[df1, df2, df_inner, df_left, df_right],
    titles=['df1', 'df2', 'inner_merge', 'left_merge', 'right_merge']
)

# O merge mistura tudo e retira todos os valores faltantes esquerda e direita.
# o left  mistura todos os valores , e retira os valores faltantes da direita.
# o right mistura todos os valores  e  retira os valores faltantes da esquerda.

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3

Unnamed: 0,key,C,D
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3
4,K4,C4,D4

Unnamed: 0,key,A,B,C,D
0,K1,A1,B1,C1,D1
1,K2,A2,B2,C2,D2
2,K3,A3,B3,C3,D3

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,,
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3

Unnamed: 0,key,A,B,C,D
0,K1,A1,B1,C1,D1
1,K2,A2,B2,C2,D2
2,K3,A3,B3,C3,D3
3,K4,,,C4,D4


In [18]:
#função para  escolher tipo de convergencia interativa
@interact(method=['inner','left', 'right','outer'])
def merge(method):
  df_merge=df1.merge(df2, how=method)
  display_side_by_side(
      [df1,df2,df_merge],
      ['df1','df2',f'{method}_join']
  )

interactive(children=(Dropdown(description='method', options=('inner', 'left', 'right', 'outer'), value='inner…

In [19]:
#concatenando uma em cima da outra
pd.concat([df1,df3])

Unnamed: 0,key,A,B,E,F
0,K0,A0,B0,,
1,K1,A1,B1,,
2,K2,A2,B2,,
3,K3,A3,B3,,
1,K1,,,E1,F1
2,K2,,,E2,F2
3,K3,,,E3,F3
4,K4,,,E4,F4


In [None]:
#concatenando uma ao lado da outra.
#o concat leva em consideração o index
pd.concat([df1,df3],axis=1)

In [21]:
# Separando exemplo de data_frame
df_A = df1.iloc[:2]
df_B = df1.iloc[2:]
display_side_by_side([df_A,df_B], ['df_A','df_B'])

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1

Unnamed: 0,key,A,B
2,K2,A2,B2
3,K3,A3,B3


In [23]:
pd.concat(([df_A,df_B.reset_index(drop = True)]), axis =1 )

Unnamed: 0,key,A,B,key.1,A.1,B.1
0,K0,A0,B0,K2,A2,B2
1,K1,A1,B1,K3,A3,B3


In [25]:
#Criando tabelas
df_left = pd.DataFrame({
    'A':['A0', 'A1', 'A2'],
    'B':['B0','B1','B2']
}, index=['K0','K1','K2'])

df_right = pd.DataFrame({
    'C':['C0','C1', 'C2'],
    'D':['D0','D1','D2']
}, index=['K0','K2','K3'])

#aplicando o join , lef e o padrao
df_join = df_left.join(df_right)

#colocando lado a lado
display_side_by_side([df_left, df_right, df_join],['df_left', 'df_right', 'df_join'] )


Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2

Unnamed: 0,C,D
K0,C0,D0
K2,C1,D1
K3,C2,D2

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C1,D1
