# Multiple Tables

In [1]:
import pandas as pd 
import sqlite3


def create_df(cursor: sqlite3.Cursor) -> pd.DataFrame:
    data = cursor.fetchall()
    column_names = [description[0] for description in cursor.description]
    df = pd.DataFrame(data, columns=column_names)
    return df


conn = sqlite3.connect(":memory:")

#### Prepare Database

In [2]:
st = """ 
    CREATE TABLE trips (
        id INTEGER, 
        date TEXT,
        pickup TEXT,
        dropoff TEXT,
        rider_id INTEGER,
        car_id INTEGER,
        type TEXT,
        cost INTEGER
    );
"""
conn.execute(st)


st = """ 
    CREATE TABLE riders (
        id INTEGER,
        first TEXT,
        last TEXT,
        username TEXT,
        rating INTEGER,
        total_trips INTEGER,
        referred INTEGER
    );
"""
conn.execute(st)


st = """ 
    CREATE TABLE riders2 (
        id INTEGER,
        first TEXT,
        last TEXT,
        username TEXT,
        rating INTEGER,
        total_trips INTEGER,
        referred INTEGER
    );
"""
conn.execute(st)


st = """ 
    CREATE TABLE cars (
        id INTEGER,
        model TEXT,
        OS TEXT,
        status TEXT,
        trips_completed INTEGER
    );
"""
conn.execute(st)

<sqlite3.Cursor at 0x2899eebbb40>

In [3]:
trips = [
    (1001, "2017-12-05", "06:45", "07:10", 102, 1, "X", 28.66),
    (1002, "2017-12-05", "08:00", "08:15", 101, 3, "POOL", 9.11),
    (1003, "2017-12-05", "09:30", "09:50", 104, 4, "X", 24.98),
    (1004, "2017-12-05", "13:40", "14:05", 105, 1, "X", 31.27),
    (1005, "2017-12-05", "15:15", "16:00", 103, 2, "POOL", 18.95),
    (1006, "2017-12-05", "18:20", "18:55", 101, 3, "XL", 78.52),
]
conn.executemany(
    """INSERT INTO trips (id, date, pickup, dropoff, rider_id, car_id, type, cost)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
    """,
    trips,
)


riders = [
    (101, "Sonny", "Li", "@sonnynomnom", 4.66, 352, None),
    (102, "Laura", "Breiman", "@lauracle", 4.99, 687, 101),
    (103, "Kassa", "Korely", "@kassablanca", 4.63, 42, None),
    (104, "Yakov", "Kagan", "@yakovkagan", 4.52, 1910, 103),
]
conn.executemany(
    """INSERT INTO riders (id, first, last, username, rating, total_trips, referred)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """,
    riders,
)


riders2 = [
    (105, "Zach", "Sims", "@zsims", 4.85, 787, None),
    (106, "Eric", "Vaught", "@posturelol", 4.96, 54, 101),
    (107, "Jilly", "Beans", "@jillkuzmin", 4.7, 32, 101),
]
conn.executemany(
    """INSERT INTO riders (id, first, last, username, rating, total_trips, referred)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """,
    riders2,
)


cars = [
    (1, "Ada", "Ryzac", "active", 82),
    (2, "Ada", "Ryzac", "active", 30),
    (3, "Turing XL", "Ryzac", "active", 164),
    (4, "Akira", "Finux", "maintenance", 22),
]
conn.executemany(
    """INSERT INTO cars (id, model, OS, status, trips_completed)
        VALUES (?, ?, ?, ?, ?)
    """,
    cars,
)

<sqlite3.Cursor at 0x2898fcb0540>

#### 1.

Let’s examine the three tables.

```sql
SELECT * FROM trips;

SELECT * FROM riders;

SELECT * FROM cars;
```

What are the column names?

<details><summary style="display:list-item; font-size:16px; color:white;">Solution</summary>

**trips** table

- `id` - trip ID
- `date` - trip date
- `pickup` - pickup time stamp
- `dropoff` - drop-off time stamp
- `rider_id` - user ID
- `car_id` - car ID
- `type` - type of trip (X, POOL, XL)
- `cost` - trip cost

**riders** table

- `id` - user ID
- `first` - user first name
- `last` - user last name
- `username` - user handle
- `rating` - user average rating
- `total_trips` - total rides ridden
- `referred` - referred by (user ID)

**cars** table

- `id` - car ID
- `model` - car model
- `OS` - operating system
- `status` - active or maintenance
- `trips_completed` - total trips completed

In [4]:
st = """ 
    SELECT * 
    FROM trips;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,date,pickup,dropoff,rider_id,car_id,type,cost
0,1001,2017-12-05,06:45,07:10,102,1,X,28.66
1,1002,2017-12-05,08:00,08:15,101,3,POOL,9.11
2,1003,2017-12-05,09:30,09:50,104,4,X,24.98
3,1004,2017-12-05,13:40,14:05,105,1,X,31.27
4,1005,2017-12-05,15:15,16:00,103,2,POOL,18.95


In [5]:
st = """ 
    SELECT * 
    FROM riders;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,first,last,username,rating,total_trips,referred
0,101,Sonny,Li,@sonnynomnom,4.66,352,
1,102,Laura,Breiman,@lauracle,4.99,687,101.0
2,103,Kassa,Korely,@kassablanca,4.63,42,
3,104,Yakov,Kagan,@yakovkagan,4.52,1910,103.0
4,105,Zach,Sims,@zsims,4.85,787,


In [6]:
st = """ 
    SELECT * 
    FROM cars;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,model,OS,status,trips_completed
0,1,Ada,Ryzac,active,82
1,2,Ada,Ryzac,active,30
2,3,Turing XL,Ryzac,active,164
3,4,Akira,Finux,maintenance,22


#### 2.

What’s the primary key of `trips`?

What’s the primary key of `riders`?

What’s the primary key of `cars`?

<details><summary style="display:list-item; font-size:16px; color:white;">Solution</summary>

- The primary key of `trips` is `id`.
- The primary key of `riders` is `id`.
- The primary key of `cars` is `id`.

They have the same name, but they are very different.

In [7]:
st = """ 
    SELECT * 
    FROM trips;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,date,pickup,dropoff,rider_id,car_id,type,cost
0,1001,2017-12-05,06:45,07:10,102,1,X,28.66
1,1002,2017-12-05,08:00,08:15,101,3,POOL,9.11
2,1003,2017-12-05,09:30,09:50,104,4,X,24.98
3,1004,2017-12-05,13:40,14:05,105,1,X,31.27
4,1005,2017-12-05,15:15,16:00,103,2,POOL,18.95


In [8]:
st = """ 
    SELECT * 
    FROM riders;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,first,last,username,rating,total_trips,referred
0,101,Sonny,Li,@sonnynomnom,4.66,352,
1,102,Laura,Breiman,@lauracle,4.99,687,101.0
2,103,Kassa,Korely,@kassablanca,4.63,42,
3,104,Yakov,Kagan,@yakovkagan,4.52,1910,103.0
4,105,Zach,Sims,@zsims,4.85,787,


In [9]:
st = """ 
    SELECT * 
    FROM cars;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,model,OS,status,trips_completed
0,1,Ada,Ryzac,active,82
1,2,Ada,Ryzac,active,30
2,3,Turing XL,Ryzac,active,164
3,4,Akira,Finux,maintenance,22


#### 3.

Try out a simple cross join between `riders` and `cars`.

Is the result useful?

In [10]:
st = """ 
    SELECT * 
    FROM riders 
    CROSS JOIN cars;
"""

create_df(conn.execute(st)).head()

Unnamed: 0,id,first,last,username,rating,total_trips,referred,id.1,model,OS,status,trips_completed
0,101,Sonny,Li,@sonnynomnom,4.66,352,,1,Ada,Ryzac,active,82
1,101,Sonny,Li,@sonnynomnom,4.66,352,,2,Ada,Ryzac,active,30
2,101,Sonny,Li,@sonnynomnom,4.66,352,,3,Turing XL,Ryzac,active,164
3,101,Sonny,Li,@sonnynomnom,4.66,352,,4,Akira,Finux,maintenance,22
4,102,Laura,Breiman,@lauracle,4.99,687,101.0,1,Ada,Ryzac,active,82


#### 4.

Suppose we want to create a Trip Log with the trips and its users.

Find the columns to join between `trips` and `riders` and combine the two tables using a `LEFT JOIN`.

Let `trips` be the left table.

In [11]:
st = """ 
    SELECT * 
    FROM trips 
    LEFT JOIN riders
        ON trips.rider_id = riders.id;
"""

create_df(conn.execute(st))

Unnamed: 0,id,date,pickup,dropoff,rider_id,car_id,type,cost,id.1,first,last,username,rating,total_trips,referred
0,1001,2017-12-05,06:45,07:10,102,1,X,28.66,102,Laura,Breiman,@lauracle,4.99,687,101.0
1,1002,2017-12-05,08:00,08:15,101,3,POOL,9.11,101,Sonny,Li,@sonnynomnom,4.66,352,
2,1003,2017-12-05,09:30,09:50,104,4,X,24.98,104,Yakov,Kagan,@yakovkagan,4.52,1910,103.0
3,1004,2017-12-05,13:40,14:05,105,1,X,31.27,105,Zach,Sims,@zsims,4.85,787,
4,1005,2017-12-05,15:15,16:00,103,2,POOL,18.95,103,Kassa,Korely,@kassablanca,4.63,42,
5,1006,2017-12-05,18:20,18:55,101,3,XL,78.52,101,Sonny,Li,@sonnynomnom,4.66,352,


#### 5.

Suppose we want to create a link between the `trips` and the `cars` used during those trips.

Find the columns to join on and combine the `trips` and `cars` table using an `INNER JOIN`.

In [12]:
st = """ 
    SELECT * 
    FROM trips 
    JOIN cars
        ON trips.car_id = cars.id;
"""

create_df(conn.execute(st))

Unnamed: 0,id,date,pickup,dropoff,rider_id,car_id,type,cost,id.1,model,OS,status,trips_completed
0,1001,2017-12-05,06:45,07:10,102,1,X,28.66,1,Ada,Ryzac,active,82
1,1002,2017-12-05,08:00,08:15,101,3,POOL,9.11,3,Turing XL,Ryzac,active,164
2,1003,2017-12-05,09:30,09:50,104,4,X,24.98,4,Akira,Finux,maintenance,22
3,1004,2017-12-05,13:40,14:05,105,1,X,31.27,1,Ada,Ryzac,active,82
4,1005,2017-12-05,15:15,16:00,103,2,POOL,18.95,2,Ada,Ryzac,active,30
5,1006,2017-12-05,18:20,18:55,101,3,XL,78.52,3,Turing XL,Ryzac,active,164


#### 6.

The new riders data are in! There are three new users this month.

Stack the `riders` table on top of the new table named `riders2`.

In [13]:
st = """ 
    SELECT * 
    FROM riders 
    UNION
    SELECT *
    FROM riders2;
"""

create_df(conn.execute(st))

Unnamed: 0,id,first,last,username,rating,total_trips,referred
0,101,Sonny,Li,@sonnynomnom,4.66,352,
1,102,Laura,Breiman,@lauracle,4.99,687,101.0
2,103,Kassa,Korely,@kassablanca,4.63,42,
3,104,Yakov,Kagan,@yakovkagan,4.52,1910,103.0
4,105,Zach,Sims,@zsims,4.85,787,
5,106,Eric,Vaught,@posturelol,4.96,54,101.0
6,107,Jilly,Beans,@jillkuzmin,4.7,32,101.0


#### 7.

What is the average `cost` for a trip?

In [15]:
st = """ 
    SELECT AVG(cost) AS 'Average Cost'
    FROM trips;
"""

create_df(conn.execute(st))

Unnamed: 0,Average Cost
0,31.915


#### 8.

Lyft is looking to do an email campaign for all the irregular users.

Find all the `riders` who have used Lyft less than 500 times!

In [16]:
st = """ 
    SELECT *
    FROM riders
    WHERE total_trips < 500
    UNION
    SELECT *
    FROM riders2
    WHERE total_trips < 500;
"""

create_df(conn.execute(st))

Unnamed: 0,id,first,last,username,rating,total_trips,referred
0,101,Sonny,Li,@sonnynomnom,4.66,352,
1,103,Kassa,Korely,@kassablanca,4.63,42,
2,106,Eric,Vaught,@posturelol,4.96,54,101.0
3,107,Jilly,Beans,@jillkuzmin,4.7,32,101.0


#### 9.

Calculate the number of cars that are `active`.

In [18]:
st = """ 
    SELECT COUNT(*) AS 'Status Active'
    FROM cars
    WHERE status = 'active';
"""

create_df(conn.execute(st))

Unnamed: 0,Status Active
0,3


#### 10.

It’s safety recall time for cars that have been on the road for a while.

Write a query that finds the two cars that have the highest `trips_completed`.

In [21]:
st = """ 
    SELECT *
    FROM cars
    ORDER BY trips_completed DESC
    LIMIT 2;
"""

create_df(conn.execute(st))

Unnamed: 0,id,model,OS,status,trips_completed
0,3,Turing XL,Ryzac,active,164
1,1,Ada,Ryzac,active,82
