# 6. Jointures
Les jointures sont une des raisons d'utiliser Pandas plutôt que des outils comme Excel. En effet, ce type d'opération est très simple à réaliser avec Pandas.

## merge
```python
# Fusionner deux dataframes sur la base d'une colonne du mêm nom
df.merge(dg, on='col_identique', how='inner')

# Fusionner deux dataframes sur la base de colonnes différentes entre les deux dataframes
df.merge(dg, left_on='col_df', right_on='col_dg', how='inner')

# Fusionner deux dataframes sur la base de l'indexe
df.merge(dg, left_index=True, right_on='col_dg', how='inner')

# Conserver toutes les lignes de df, ajoute des valeurs manquantes quand la ligne n'existe pas dans dg
df.merge(dg, on='col_identique', how='left')
```

## concat
```python
# Réunit deux dataframes. Les index ne doivent en principe pas avoir de clés communes
df = pd.concat((d_p1, d_p2))

# Réunit deux dataframes en ignorant l'index
df = pd.concat((d_p1, d_p2), ignore_index=True)
```

In [8]:
# Importer les bibliothèques essentielles
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Afficher toutes les colonnes
pd.set_option('display.max_columns', None)

In [9]:
# Chargement des données
d_items = pd.read_csv('Data/items_2.csv', low_memory=False)
d_port = pd.read_csv('Data/portfolios.csv', low_memory=False)

In [10]:
d_items.head()

Unnamed: 0,MMS Id,Title,Num of Items (In Repository),Num of Loans (Not In House),Last Loan Date (calendar)
0,990000945010205516,Differential geometry of curves and surfaces,10,10,2018-03-05
1,990000945010205516,Differential geometry of curves and surfaces,10,540,2018-04-18
2,990000945010205516,Differential geometry of curves and surfaces,10,410,2020-09-24
3,990000945010205516,Differential geometry of curves and surfaces,0,320,2021-03-04
4,990000945010205516,Differential geometry of curves and surfaces,10,200,2021-09-22


In [11]:
d_port.head()

Unnamed: 0,Electronic Collection Public Name,MMS Id,Title,Portfolio Id,Electronic Collection Id
0,IEEE Electronic Library (IEL),99116698588205516,IEEE guide for transformer loss measurement,5328982920005516,6128473920005516
1,IEEE Electronic Library (IEL),99116701948805516,IEEE Std 1671.1-2009: IEEE Trial-Use Standard ...,5329679310005516,6128473920005516
2,"Ebook Central Perpetual, DDA and Subscription ...",99116748315205516,Multicultural and diversity issues in student ...,5337607890005516,6128471720005516
3,American Mathematical Society eBooks,99116706470405516,Thirteen papers on algebra and analysis /,5330694700005516,6128473930005516
4,Springer Computer Science eBooks 2020 English/...,99116705449105516,Inductive Logic Programming 29th International...,5330449220005516,6128473820005516


## Merge

In [12]:
# Fusion des deux dataframes
d_hybrid = d_items.merge(d_port, on='MMS Id', how='inner')
d_hybrid

Unnamed: 0,MMS Id,Title_x,Num of Items (In Repository),Num of Loans (Not In House),Last Loan Date (calendar),Electronic Collection Public Name,Title_y,Portfolio Id,Electronic Collection Id
0,990017438570205516,Electrochemically induced luminescence at nano...,0,0,,,Electrochemically induced luminescence at nano...,5328365220005516,
1,990047728480205516,Introduction à l'électrotechnique,10,0,2011-11-03,,Introduction à l'électrotechnique,5328435630005516,
2,990047728480205516,Introduction à l'électrotechnique,10,110,2020-02-17,,Introduction à l'électrotechnique,5328435630005516,
3,990047728480205516,Introduction à l'électrotechnique,10,300,2021-11-11,,Introduction à l'électrotechnique,5328435630005516,
4,990047728480205516,Introduction à l'électrotechnique,10,260,2021-11-25,,Introduction à l'électrotechnique,5328435630005516,


In [13]:
# Fusion sur des colonnes avec des noms différents
d_items = d_items[['MMS Id', 'Num of Loans (Not In House)']]
d_items.columns = ['rec_id', 'Nb_loans']
d_hybrid = d_items.merge(d_port, left_on='rec_id', right_on='MMS Id', how='inner')
d_hybrid
                  

Unnamed: 0,rec_id,Nb_loans,Electronic Collection Public Name,MMS Id,Title,Portfolio Id,Electronic Collection Id
0,990017438570205516,0,,990017438570205516,Electrochemically induced luminescence at nano...,5328365220005516,
1,990047728480205516,0,,990047728480205516,Introduction à l'électrotechnique,5328435630005516,
2,990047728480205516,110,,990047728480205516,Introduction à l'électrotechnique,5328435630005516,
3,990047728480205516,300,,990047728480205516,Introduction à l'électrotechnique,5328435630005516,
4,990047728480205516,260,,990047728480205516,Introduction à l'électrotechnique,5328435630005516,


## Concat

In [15]:
d_p1 = d_items.iloc[:8]
d_p2 = d_items.iloc[12:15]
pd.concat((d_p1, d_p2))

Unnamed: 0,rec_id,Nb_loans
0,990000945010205516,10
1,990000945010205516,540
2,990000945010205516,410
3,990000945010205516,320
4,990000945010205516,200
5,990001122290205516,470
6,990001122290205516,850
7,990001122290205516,180
12,990001122290205516,50
13,990001122290205516,150


In [16]:
d_p1 = d_items.iloc[:8]
d_p2 = d_items.iloc[6:10]
pd.concat((d_p1, d_p2))

Unnamed: 0,rec_id,Nb_loans
0,990000945010205516,10
1,990000945010205516,540
2,990000945010205516,410
3,990000945010205516,320
4,990000945010205516,200
5,990001122290205516,470
6,990001122290205516,850
7,990001122290205516,180
6,990001122290205516,850
7,990001122290205516,180


In [17]:
pd.concat((d_p1, d_p2), ignore_index=True)

Unnamed: 0,rec_id,Nb_loans
0,990000945010205516,10
1,990000945010205516,540
2,990000945010205516,410
3,990000945010205516,320
4,990000945010205516,200
5,990001122290205516,470
6,990001122290205516,850
7,990001122290205516,180
8,990001122290205516,850
9,990001122290205516,180
