# Activity 2 - SELECT JOIN

In [None]:
import sqlite3
db = sqlite3.connect('database.sqlite')

### Basic Joins

In [None]:
cursor = db.execute("""
    SELECT * FROM League INNER JOIN Country
    ON League.country_id = Country.id
""")

In [None]:
cursor.fetchall()

In [None]:
cursor = db.execute("""
    SELECT League.name, Country.name FROM League INNER JOIN Country
    ON League.country_id = Country.id
""")

In [None]:
cursor.fetchall()

**Aliases**

In [None]:
cursor = db.execute("""
    SELECT L.name, C.name FROM League L INNER JOIN Country C
    ON L.country_id = C.id
""")

In [None]:
cursor.fetchall()

### Prettier Output using Pandas

In [None]:
import pandas as pd

In [None]:
pd.read_sql("""
    SELECT L.name, C.name FROM League L INNER JOIN Country C
    ON L.country_id = C.id
""", db)

We'll use "Real Madrid" in the following queries. Just in case, this is its basic info:

In [None]:
pd.read_sql('SELECT * FROM Team WHERE team_long_name = "Real Madrid CF"', db)

### Advanced Join
_(3 or more tables)_

> **Warning:** Some of these queries are rather complex and will impose some load on sqlite. Don't worry if they take a couple of seconds; that shouldn't happen in a production environment.

**The first match played by Real Madrid at home**

In [None]:
pd.read_sql("""
    SELECT
        C.name, L.name, M.id, M.date, T.team_long_name, M.away_team_api_id,
        M.home_team_goal, M.away_team_goal
    FROM Match M
    INNER JOIN Country C ON (M.country_id = C.id)
    INNER JOIN League L ON (M.league_id = L.id)
    INNER JOIN Team T ON (M.home_team_api_id = T.team_api_id)
    WHERE T.team_long_name = "Real Madrid CF"
    LIMIT 1""", db)

Seems like they won 4-3. Who's the away team?

In [None]:
pd.read_sql("""
    SELECT *
    FROM Team T
    WHERE T.team_api_id = 9864
    LIMIT 1""", db)

![image](https://user-images.githubusercontent.com/872296/38050073-40be91a8-32a0-11e8-9726-8fac86cccac5.png)

This 2008 page seems to confirm it: https://resultados.as.com/resultados/futbol/primera/2008_2009/directo/regular_a_10_13538

**Could we have displayed both team names with only one query?**

Yes, you can join many times the same table:

In [None]:
pd.read_sql("""
    SELECT
        C.name, L.name, M.id, M.date, T_Home.team_long_name, T_Away.team_long_name,
        M.home_team_goal, M.away_team_goal
    FROM Match M
    INNER JOIN Country C ON (M.country_id = C.id)
    INNER JOIN League L ON (M.league_id = L.id)
    INNER JOIN Team T_Home ON (M.home_team_api_id = T_Home.team_api_id)
    INNER JOIN Team T_Away ON (M.away_team_api_id = T_Away.team_api_id)
    WHERE T_Home.id = 43040
    LIMIT 1""", db)

So we could start also getting the initial team lineup:

In [None]:
pd.read_sql("""
    SELECT
        M.id, M.date, T_Home.team_long_name, T_Away.team_long_name,
        M.home_team_goal, M.away_team_goal,
        P_Home_1.player_name 'H1'
    FROM Match M
    INNER JOIN Country C ON (M.country_id = C.id)
    INNER JOIN League L ON (M.league_id = L.id)
    INNER JOIN Team T_Home ON (M.home_team_api_id = T_Home.team_api_id)
    INNER JOIN Team T_Away ON (M.away_team_api_id = T_Away.team_api_id)
    
    INNER JOIN Player P_Home_1 ON (M.home_player_1 = P_Home_1.player_api_id)
    
    WHERE T_Home.id = 43040
    LIMIT 1""", db)

According to [the report from 2008](https://resultados.as.com/resultados/futbol/primera/2008_2009/directo/regular_a_10_13538), seems like Iker was Real Madrid's Goalkeeper.

We could keep extending it:

In [None]:
pd.read_sql("""
    SELECT
        M.id, M.date, T_Home.team_long_name, T_Away.team_long_name,
        M.home_team_goal, M.away_team_goal,
        P_Home_1.player_name 'H1',
        P_Home_2.player_name 'H2'
    FROM Match M
    INNER JOIN Country C ON (M.country_id = C.id)
    INNER JOIN League L ON (M.league_id = L.id)
    INNER JOIN Team T_Home ON (M.home_team_api_id = T_Home.team_api_id)
    INNER JOIN Team T_Away ON (M.away_team_api_id = T_Away.team_api_id)
    
    INNER JOIN Player P_Home_1 ON (M.home_player_1 = P_Home_1.player_api_id)
    INNER JOIN Player P_Home_2 ON (M.home_player_2 = P_Home_2.player_api_id)
    
    WHERE T_Home.id = 43040
    LIMIT 1""", db)

**But we hate repetition, so we can make Python do the hard work:**

In [None]:
base_query = """
    SELECT
        M.id, M.date, T_Home.team_long_name, T_Away.team_long_name,
        M.home_team_goal, M.away_team_goal,
        
        {player_columns}
        
    FROM Match M
    INNER JOIN Country C ON (M.country_id = C.id)
    INNER JOIN League L ON (M.league_id = L.id)
    INNER JOIN Team T_Home ON (M.home_team_api_id = T_Home.team_api_id)
    INNER JOIN Team T_Away ON (M.away_team_api_id = T_Away.team_api_id)
    
    {player_joins}
    
    WHERE T_Home.id = 43040
    LIMIT 1"""

In [None]:
columns = ""
joins = ""
for i in range(1, 12):
    joins += ('INNER JOIN Player P_Home_{pos} ON '
              '(M.home_player_{pos} = P_Home_{pos}.player_api_id)\n').format(pos=i)
    columns += "P_Home_{pos}.player_name 'H{pos}',\n".format(pos=i)
columns = columns.rstrip(',\n')

In [None]:
print(joins)

In [None]:
print(columns)

In [None]:
query = base_query.format(player_joins=joins, player_columns=columns)

In [None]:
print(query)

In [None]:
pd.read_sql(query, db)