In [19]:
import pandas as pd
import numpy as np
import datetime as dt

import plotly.graph_objects as go

In [20]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Описание

# Виды join-ов

<img src="../../data/img/sql_joins.PNG" width="500">

# Практики

```python
pd.merge(df1, df2)
df1.merge(df2)
df1.join(df2)
```

## Объединение таблиц

#### Какими будут результаты различных видов join-ов при объединении таблицы с самой собой?

In [28]:
df = pd.DataFrame({'id': [1, 2, 3], 'val': ['A', 'B', 'C']})
df

Unnamed: 0,id,val
0,1,A
1,2,B
2,3,C


In [29]:
pd.merge(df, df, on='id')

Unnamed: 0,id,val_x,val_y
0,1,A,A
1,2,B,B
2,3,C,C


In [30]:
pd.merge(df, df, on='id', how='left')

Unnamed: 0,id,val_x,val_y
0,1,A,A
1,2,B,B
2,3,C,C


In [31]:
pd.merge(df, df, on='id', how='right')

Unnamed: 0,id,val_x,val_y
0,1,A,A
1,2,B,B
2,3,C,C


In [32]:
pd.merge(df, df, on='id', how='inner')

Unnamed: 0,id,val_x,val_y
0,1,A,A
1,2,B,B
2,3,C,C


In [33]:
# pd.merge(df, df, how='cross')

#### Как объединятся эти датафреймы?

In [34]:
df1 = pd.DataFrame({'val1': ['A', 'B']}, index=[1,1])
df2 = pd.DataFrame({'val2': ['C', 'B']}, index=[1,1])

display(df1, df2)

Unnamed: 0,val1
1,A
1,B


Unnamed: 0,val2
1,C
1,B


In [35]:
df1.join(df2)

Unnamed: 0,val1,val2
1,A,C
1,A,B
1,B,C
1,B,B


## Перебор комбинаций - self join

```python
comb_df = pd.merge(df, df, how='cross', suffixes=('_a', '_b'))
comb_df = comb_df[(comb_df['id_a'] != comb_df['id_b']) & (comb_df['val_b'] > comb_df['val_a'])]
comb_df['val_diff'] = comb_df['val_b'] - comb_df['val_a']
```

#### Примеры задач

- [InterviewQuery: Closest SAT Scores - Medium](https://www.interviewquery.com/questions/closest-sat-scores)
- [Stratascratch: Finding User Purchases - Medium](https://platform.stratascratch.com/coding/10322-finding-user-purchases?code_type=1)
- [Stratascratch: Find matching hosts and guests in a way that they are both of the same gender and nationality - Medium](https://platform.stratascratch.com/coding/10078-find-matching-hosts-and-guests-in-a-way-that-they-are-both-of-the-same-gender-and-nationality?code_type=1)
- [Stratascratch: Employee and Manager Salaries - Medium](https://platform.stratascratch.com/coding/9894-employee-and-manager-salaries?tabname=question)

### Проставить значение по умолчанию для каждой незаполненой записи

In [5]:
df = pd.DataFrame({
    'Name': ['Alex Anderson', 'Rodrick Berton', 'Vera Eros', 'Anna Novikova', 'Edward Shtolz'],
    'Location': ['Block 1', 'Block 1', 'Dock 2', 'Dock 2', 'District 3']
})

add = pd.DataFrame({
    'Name': ['Alex Anderson', 'Rodrick Berton', 'Vera Eros', 'Anna Novikova', 'Edward Shtolz', 
             'Alex Anderson', 'Rodrick Berton', 'Vera Eros', 'Anna Novikova', 'Edward Shtolz'], # default-значения
    'Location': ['Block 1', 'Block 1', 'Dock 3', 'Dock 2', 'Block 1',
                 'Default', 'Default', 'Default', 'Default', 'Default'], # default-значения
    'Value': [18, 23, 37, 12, 28, 
              20, 20, 35, 10, 25], # default-значения
    'Status': ['Stable', 'Injured', 'Stable', 'Deceased', 'Stable',
               'Unknown', 'Unknown', 'Unknown', 'Unknown', 'Unknown'] # default-значения
})

df = df.merge(add, on=['Name', 'Location'], how='left')
df

Unnamed: 0,Name,Location,Value,Status
0,Alex Anderson,Block 1,18.0,Stable
1,Rodrick Berton,Block 1,23.0,Injured
2,Vera Eros,Dock 2,,
3,Anna Novikova,Dock 2,12.0,Deceased
4,Edward Shtolz,District 3,,


In [6]:
default_values = df.drop(['Location','Value', 'Status'], axis=1).merge(add[add['Location'] == 'Default'], on=['Name'], how='left')[['Value', 'Status']]
default_values

Unnamed: 0,Value,Status
0,20,Unknown
1,20,Unknown
2,35,Unknown
3,10,Unknown
4,25,Unknown


In [7]:
df[['Value', 'Status']] = df[['Value', 'Status']].fillna(default_values)
df

Unnamed: 0,Name,Location,Value,Status
0,Alex Anderson,Block 1,18.0,Stable
1,Rodrick Berton,Block 1,23.0,Injured
2,Vera Eros,Dock 2,35.0,Unknown
3,Anna Novikova,Dock 2,12.0,Deceased
4,Edward Shtolz,District 3,25.0,Unknown
