In [None]:
import pandas as pd

In [None]:
import sqlalchemy
engine = sqlalchemy.create_engine("sqlite:///nautical.db")
conn = engine.connect()

For lecture I am also going to use a postgres database. 

In [None]:
# import sqlalchemy
# engine = sqlalchemy.create_engine("postgres://jegonzal:@localhost:5432/data100")
# conn = engine.connect()

# Create Tables

In [None]:
script = """
DROP TABLE IF EXISTS sailors;
DROP TABLE IF EXISTS boats;
DROP TABLE IF EXISTS reserves;


CREATE TABLE sailors (
   sid INTEGER,   
   name CHAR(20), 
   PRIMARY KEY (sid));

CREATE TABLE boats (
   bid INTEGER,
   name CHAR (20), 
   PRIMARY KEY (bid));

 CREATE TABLE reserves (
   bid INTEGER, 
   sid INTEGER,      
   day DATE);

INSERT INTO sailors VALUES
(22, 'Alice'),
(31, 'Bob'),
(95, 'Nora');

INSERT INTO boats VALUES
(101, 'Titanic'),
(102, 'Beagle'),
(103, 'Bismarck'),
(104, 'Clipper');

INSERT INTO reserves VALUES 
(101, 22,  '10/10/96'),
(103, 95, '11/12/96'),
(41, 38,  '8/11/18');
"""
for line in script.split(";"):
    if len(line.strip()) > 0: conn.execute(line)

# View Tables

In [None]:
pd.read_sql("""
    SELECT * FROM sailors
""", conn)

In [None]:
boats = pd.read_sql("""
    SELECT * FROM boats
""", conn)
boats

In [None]:
reserves = pd.read_sql("""
    SELECT * FROM reserves
""", conn)
reserves

# Inner Join

The normal join only keeps rows that match the join predicate.

<img src="inner_join.png" alt="Inner Join" width="150px">

<center>
  <div style="display: inline-block">
    <big>Boats</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>name</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>Titanic</td>
        </tr>
        <tr>
          <th>1</th>
          <td>102</td>
          <td>Beagle</td>
        </tr>
        <tr>
          <th>2</th>
          <td>103</td>
          <td>Bismarck</td>
        </tr>
        <tr>
          <th>3</th>
          <td>104</td>
          <td>Clipper</td>
        </tr>
      </tbody>
    </table>
  </div>
  <div class="horizontalgap" style="display: inline-block; width:20px"></div>
  <div style="display: inline-block">
    <big>Reserves</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>sid</th>
          <th>day</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>22</td>
          <td>1996-10-10</td>
        </tr>
        <tr>
          <th>1</th>
          <td>103</td>
          <td>95</td>
          <td>1996-11-12</td>
        </tr>
        <tr>
          <th>2</th>
          <td>41</td>
          <td>38</td>
          <td>2018-08-11</td>
        </tr>
      </tbody>
    </table>
  </div>
</center>




In [None]:
pd.read_sql("""
    SELECT * 
    FROM boats b, reserves r
    WHERE b.bid = r.bid
""", conn)

In [None]:
pd.read_sql("""
    SELECT * 
    FROM boats b INNER JOIN reserves r
        ON b.bid = r.bid
""", conn)

# Left Join

Same as inner join but if there is a row on the left table that failed to join with a row on the right table it is also included with null values in place of all entries in the right table.

<img src="left_join.png" alt="Left Join" width="150px">
<center>
  <div style="display: inline-block">
    <big>Boats</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>name</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>Titanic</td>
        </tr>
        <tr>
          <th>1</th>
          <td>102</td>
          <td>Beagle</td>
        </tr>
        <tr>
          <th>2</th>
          <td>103</td>
          <td>Bismarck</td>
        </tr>
        <tr>
          <th>3</th>
          <td>104</td>
          <td>Clipper</td>
        </tr>
      </tbody>
    </table>
  </div>
  <div class="horizontalgap" style="display: inline-block; width:20px"></div>
  <div style="display: inline-block">
    <big>Reserves</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>sid</th>
          <th>day</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>22</td>
          <td>1996-10-10</td>
        </tr>
        <tr>
          <th>1</th>
          <td>103</td>
          <td>95</td>
          <td>1996-11-12</td>
        </tr>
        <tr>
          <th>2</th>
          <td>41</td>
          <td>38</td>
          <td>2018-08-11</td>
        </tr>
      </tbody>
    </table>
  </div>
</center>

In [None]:
pd.read_sql("""
    SELECT * 
    FROM boats b LEFT JOIN reserves r
        ON b.bid = r.bid
""", conn, )

# Right Join

Just like left join but keeping values on the right side.

<img src="right_join.png" alt="Right Join" width="150px">

<details>
    <summary>SQLite Note</summary>    
    Right joins are not support in SQLite but you can use a LEFT JOIN instead.
</details>

<center>
  <div style="display: inline-block">
    <big>Boats</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>name</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>Titanic</td>
        </tr>
        <tr>
          <th>1</th>
          <td>102</td>
          <td>Beagle</td>
        </tr>
        <tr>
          <th>2</th>
          <td>103</td>
          <td>Bismarck</td>
        </tr>
        <tr>
          <th>3</th>
          <td>104</td>
          <td>Clipper</td>
        </tr>
      </tbody>
    </table>
  </div>
  <div class="horizontalgap" style="display: inline-block; width:20px"></div>
  <div style="display: inline-block">
    <big>Reserves</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>sid</th>
          <th>day</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>22</td>
          <td>1996-10-10</td>
        </tr>
        <tr>
          <th>1</th>
          <td>103</td>
          <td>95</td>
          <td>1996-11-12</td>
        </tr>
        <tr>
          <th>2</th>
          <td>41</td>
          <td>38</td>
          <td>2018-08-11</td>
        </tr>
      </tbody>
    </table>
  </div>
</center>

In [None]:
# pd.read_sql("""
#     SELECT * 
#     FROM boats b RIGHT JOIN reserves r
#         ON b.bid = r.bid
# """, conn)

In [None]:
pd.read_sql("""
    SELECT * 
    FROM reserves r LEFT JOIN boats b 
        ON b.bid = r.bid
""", conn)

# Full Join

The outer ensures that at least one row from both tables appears in the final result even if there are no matches by adding null values.

<img src="full_join.png" alt="Full Join" width="150px">

<details>
    <summary>SQLite Note</summary>    
    Full outer joins are not support in sqlite.
</details>
<center>
  <div style="display: inline-block">
    <big>Boats</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>name</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>Titanic</td>
        </tr>
        <tr>
          <th>1</th>
          <td>102</td>
          <td>Beagle</td>
        </tr>
        <tr>
          <th>2</th>
          <td>103</td>
          <td>Bismarck</td>
        </tr>
        <tr>
          <th>3</th>
          <td>104</td>
          <td>Clipper</td>
        </tr>
      </tbody>
    </table>
  </div>
  <div class="horizontalgap" style="display: inline-block; width:20px"></div>
  <div style="display: inline-block">
    <big>Reserves</big>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th></th>
          <th>bid</th>
          <th>sid</th>
          <th>day</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>0</th>
          <td>101</td>
          <td>22</td>
          <td>1996-10-10</td>
        </tr>
        <tr>
          <th>1</th>
          <td>103</td>
          <td>95</td>
          <td>1996-11-12</td>
        </tr>
        <tr>
          <th>2</th>
          <td>41</td>
          <td>38</td>
          <td>2018-08-11</td>
        </tr>
      </tbody>
    </table>
  </div>
</center>

In [None]:
# pd.read_sql("""
#     SELECT *
#     FROM boats b FULL JOIN reserves r
#         ON b.bid = r.bid
# """, conn)