# Getting started with joins

In [53]:
#!pip install SQLAlchemy==1.4.22

In [1]:
import pandas as pd
import sqlalchemy as sq

In [8]:
left_table = { 
            "x": pd.Series([1, 3], dtype="string"), 
            "y": pd.Series([2, 4], dtype="string")
         }

left_table_DF = pd.DataFrame(left_table)
print(left_table_DF.dtypes)
left_table_DF

x    string[python]
y    string[python]
dtype: object


Unnamed: 0,x,y
0,1,2
1,3,4


In [9]:
right_table = {
            "a": pd.Series([1, 2], dtype="string"), 
            "b": pd.Series([3, 4], dtype="string")
         }

right_table_DF = pd.DataFrame(right_table)
right_table_DF

Unnamed: 0,a,b
0,1,3
1,2,4


In [10]:

engine = sq.create_engine('mysql+mysqlconnector://sean_anselmo:4i1tawVQFvTUd@datasciencedb.ucalgary.ca/sean_anselmo')

In [11]:
left_table_DF.to_sql("left_table", engine, if_exists="replace", index=False)
left_table_sql = pd.read_sql_table("left_table", engine)


In [12]:
left_table_sql

Unnamed: 0,x,y
0,1,2
1,3,4


In [13]:
right_table_DF.to_sql("right_table", engine, if_exists="replace", index=False)

2

In [14]:
join_query = '''
SELECT * FROM left_table CROSS JOIN right_table ON (left_table.x=right_table.a);
'''
pd.read_sql_query(join_query, engine)

Unnamed: 0,x,y,a,b
0,1,2,1,3


In [16]:
left_table_DF

Unnamed: 0,x,y
0,1,2
1,3,4


In [17]:
right_table_DF

Unnamed: 0,a,b
0,1,3
1,2,4


In [18]:
join_query = '''
SELECT * FROM left_table RIGHT JOIN right_table ON (left_table.x=right_table.a);
'''
pd.read_sql_query(join_query, engine)

Unnamed: 0,x,y,a,b
0,1.0,2.0,1,3
1,,,2,4


In [19]:
join_query = '''
SELECT * FROM left_table LEFT JOIN right_table ON (left_table.x=right_table.a);
'''
pd.read_sql_query(join_query, engine)

Unnamed: 0,x,y,a,b
0,1,2,1.0,3.0
1,3,4,,


### Example with 3 tables

Define sample data for movies, theaters, and showtimes

In [20]:
movies_data = {'id': [1, 2, 3, 4],
               'title': ['Movie1', 'Movie2', 'Movie3', 'Movie4']}

theaters_data = {'id': [1, 2, 3, 4],
                 'name': ['TheaterA', 'TheaterB', 'TheaterC', 'TheaterD']}

showtimes_data = {'id': [1, 2, 3],
                  'start_time': ['2023-01-01 18:00:00', '2023-01-02 19:30:00', '2023-01-03 15:45:00'],
                  'movie_id': [1, 2, 3],
                  'theater_id': [1, 2, 3]}

In [21]:
movies_df = pd.DataFrame(movies_data)
theaters_df = pd.DataFrame(theaters_data)
showtimes_df = pd.DataFrame(showtimes_data)

# upload DataFrames to database using the connection
movies_df.to_sql('movies', engine, index=False, if_exists='replace')
theaters_df.to_sql('theaters', engine, index=False, if_exists='replace')
showtimes_df.to_sql('showtimes', engine, index=False, if_exists='replace')

-1

In [22]:
# Inner Join using raw SQL with read_sql_query
sql = """
    SELECT movies.title, theaters.name
    FROM movies
    INNER JOIN showtimes ON movies.id = showtimes.movie_id
    INNER JOIN theaters ON showtimes.theater_id = theaters.id
"""

result_df = pd.read_sql_query(sql, engine)

print(result_df)

    title      name
0  Movie1  TheaterA
1  Movie2  TheaterB
2  Movie3  TheaterC


In [49]:
# Left Outer Join 
sql = """
    SELECT movies.title, theaters.name, showtimes.start_time
    FROM movies
    LEFT JOIN showtimes ON movies.id = showtimes.movie_id
    LEFT JOIN theaters ON showtimes.theater_id = theaters.id
"""

result_df = pd.read_sql_query(sql, engine)

print(result_df)

    title      name           start_time
0  Movie1  TheaterA  2023-01-01 18:00:00
1  Movie2  TheaterB  2023-01-02 19:30:00
2  Movie3  TheaterC  2023-01-03 15:45:00
3  Movie4      None                 None


In [23]:
# Right Outer Join
sql = """
    SELECT movies.title, theaters.name, showtimes.start_time
    FROM movies
    RIGHT JOIN showtimes ON movies.id = showtimes.movie_id
    RIGHT JOIN theaters ON showtimes.theater_id = theaters.id
"""

result_df = pd.read_sql_query(sql, engine)

print(result_df)

    title      name           start_time
0  Movie1  TheaterA  2023-01-01 18:00:00
1  Movie2  TheaterB  2023-01-02 19:30:00
2  Movie3  TheaterC  2023-01-03 15:45:00
3    None  TheaterD                 None


In [52]:
# How to do a cross join?
sql = """
    SELECT *
    FROM movies
    CROSS JOIN theaters;
"""

result_df = pd.read_sql_query(sql, engine)

print(result_df)

    id   title  id      name
0    1  Movie1   1  TheaterA
1    2  Movie2   1  TheaterA
2    3  Movie3   1  TheaterA
3    4  Movie4   1  TheaterA
4    1  Movie1   2  TheaterB
5    2  Movie2   2  TheaterB
6    3  Movie3   2  TheaterB
7    4  Movie4   2  TheaterB
8    1  Movie1   3  TheaterC
9    2  Movie2   3  TheaterC
10   3  Movie3   3  TheaterC
11   4  Movie4   3  TheaterC
12   1  Movie1   4  TheaterD
13   2  Movie2   4  TheaterD
14   3  Movie3   4  TheaterD
15   4  Movie4   4  TheaterD


In [None]:
# try just calling JOIN with no 'RIGHT' or 'LEFT', 'INNER', etc. What does the result look like?

In [18]:
engine.dispose()