# JOIN

In [0]:
schema_characters = "id INTEGER, name STRING, guild STRING, class STRING, level INTEGER, experience INTEGER, health DOUBLE, is_alive BOOLEAN, last_active TIMESTAMP, mentor_id INTEGER"

df_characters = spark.read.format("csv").option("header",True).option("sep",",").schema(schema_characters).load('dbfs:/FileStore/tables/fantasy/characters.csv')

df_characters.show()

+---+---------+----------+-------+-----+----------+------+--------+-------------------+---------+
| id|     name|     guild|  class|level|experience|health|is_alive|        last_active|mentor_id|
+---+---------+----------+-------+-----+----------+------+--------+-------------------+---------+
|  1|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true|2023-09-20 08:00:00|     null|
|  2|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|2023-09-21 09:00:00|     null|
|  3|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|2023-09-22 07:00:00|     null|
|  4|    Frodo| Shirefolk| Hobbit|   12|      2500|  50.5|   false|2023-09-23 10:00:00|        1|
|  5|      Sam| Shirefolk| Hobbit|   13|      2750|  52.5|    true|2023-09-23 10:05:00|        1|
|  6|  Gandalf|    Istari|   Mage|   30|     10000|  90.0|    true|2023-09-20 11:00:00|     null|
|  7|  Boromir|    Gondor|Warrior|   18|      5500| 115.5|   false|2023-09-21 12:00:00|     null|
|  8|    Merry| Shir

In [0]:
schema_inventory = "id INTEGER, character_id INTEGER, item_id INTEGER, quantity INTEGER, is_equipped BOOLEAN, purchase_date TIMESTAMP, expiry_date TIMESTAMP, value INTEGER"

df_inventory = spark.read.format("csv").options(header="True",sep=",").load('dbfs:/FileStore/tables/fantasy/inventory.csv')

df_inventory.show(truncate=False)

+---+------------+-------+--------+-----------+-----------------------+-----------------------+-----+
|id |character_id|item_id|quantity|is_equipped|purchase_date          |expiry_date            |value|
+---+------------+-------+--------+-----------+-----------------------+-----------------------+-----+
|1  |1           |99     |1       |TRUE       |2023-09-10 08:00:00 UTC|null                   |1500 |
|2  |1           |4      |2       |FALSE      |2023-09-12 09:00:00 UTC|2023-10-12 09:00:00 UTC|50   |
|3  |2           |2      |1       |TRUE       |2023-09-11 08:30:00 UTC|null                   |1100 |
|4  |2           |101    |1       |FALSE      |2023-09-12 10:00:00 UTC|2023-10-12 10:00:00 UTC|60   |
|5  |3           |3      |1       |TRUE       |2023-09-15 08:00:00 UTC|null                   |1300 |
|6  |3           |6      |1       |FALSE      |2023-09-16 09:00:00 UTC|null                   |200  |
|7  |4           |9      |2       |FALSE      |2023-09-20 08:00:00 UTC|2023-10-20 

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, TimestampType, BooleanType

In [0]:
schema_items = StructType([
    StructField("id",IntegerType(),True),
    StructField("name",StringType(),True),
    StructField("item_type",StringType(),True),
    StructField("power",IntegerType(),True),
    StructField("weight",DoubleType(),True),
    StructField("rarity",StringType(),True),
    StructField("date_added",TimestampType(),True),
])

df_items = spark.read.format("csv").schema(schema_items).option("header",True).option("sep",",").load('dbfs:/FileStore/tables/fantasy/items.csv')

df_items.show()

+---+--------------------+---------+-----+------+---------+-------------------+
| id|                name|item_type|power|weight|   rarity|         date_added|
+---+--------------------+---------+-----+------+---------+-------------------+
|  1|           Excalibur|   Weapon|  100|  10.5|Legendary|2023-09-01 09:00:00|
|  2|           Elven Bow|   Weapon|   85|   3.5|     Rare|2023-09-02 10:00:00|
|  3|         Dwarven Axe|   Weapon|   90|  12.0|     Rare|2023-09-03 11:00:00|
|  4|      Healing Potion|   Potion|   50|   1.0|   Common|2023-09-04 08:00:00|
|  5|         Mana Potion|   Potion|   60|   1.1|   Common|2023-09-05 08:30:00|
|  6|       Leather Armor|    Armor|   40|   8.0|   Common|2023-09-06 09:00:00|
|  7|     Chainmail Armor|    Armor|   70|  18.5|     Rare|2023-09-07 10:00:00|
|  8|  Dragon Scale Armor|    Armor|   90|  20.0|Legendary|2023-09-08 11:00:00|
|  9|     Fireball Scroll|   Weapon|   75|   0.5|   Common|2023-09-09 07:00:00|
| 10|     Teleport Scroll|   Weapon|   8

## Inner Join

In [0]:
df_characters \
    .join(df_inventory, df_characters.id == df_inventory.character_id,"inner") \
    .join(df_items,df_inventory.item_id == df_items.id,"inner") \
    .select(df_characters["name"],df_characters["guild"],df_characters["class"],df_characters["level"],df_inventory["quantity"],df_inventory["is_equipped"],df_inventory["value"],df_items["name"],df_items["item_type"],df_items["power"]) \
    .show()

+---------+----------+-------+-----+--------+-----------+-----+------------------+---------+-----+
|     name|     guild|  class|level|quantity|is_equipped|value|              name|item_type|power|
+---------+----------+-------+-----+--------+-----------+-----+------------------+---------+-----+
|  Aragorn|   Rangers|Warrior|   25|       2|      FALSE|   50|    Healing Potion|   Potion|   50|
|  Legolas|  Mirkwood| Archer|   22|       1|       TRUE| 1100|         Elven Bow|   Weapon|   85|
|    Gimli|    Erebor|Warrior|   20|       1|      FALSE|  200|     Leather Armor|    Armor|   40|
|    Gimli|    Erebor|Warrior|   20|       1|       TRUE| 1300|       Dwarven Axe|   Weapon|   90|
|    Frodo| Shirefolk| Hobbit|   12|       2|      FALSE|  150|   Fireball Scroll|   Weapon|   75|
|      Sam| Shirefolk| Hobbit|   13|       1|      FALSE|  160|   Teleport Scroll|   Weapon|   80|
|  Gandalf|    Istari|   Mage|   30|       1|      FALSE|   30|    Stamina Elixir|   Potion|   30|
|  Gandalf

## Outer Join

In [0]:
df_characters.join(df_inventory, df_characters.id == df_inventory.character_id,"inner").join(df_items,df_inventory.item_id == df_items.id,"outer").drop("id","expiry_date","purchase_date","date_added","last_active","mentor_id","item_id").show()

+---------+----------+-------+-----+----------+------+--------+------------+--------+-----------+-----+------------------+---------+-----+------+---------+
|     name|     guild|  class|level|experience|health|is_alive|character_id|quantity|is_equipped|value|              name|item_type|power|weight|   rarity|
+---------+----------+-------+-----+----------+------+--------+------------+--------+-----------+-----+------------------+---------+-----+------+---------+
|     null|      null|   null| null|      null|  null|    null|        null|    null|       null| null|         Excalibur|   Weapon|  100|  10.5|Legendary|
|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|           2|       1|       TRUE| 1100|         Elven Bow|   Weapon|   85|   3.5|     Rare|
|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|           3|       1|       TRUE| 1300|       Dwarven Axe|   Weapon|   90|  12.0|     Rare|
|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true| 

## Left Outer Join

In [0]:
#Can be use left, leftouter or left_outer
df_characters.join(df_inventory, df_characters.id == df_inventory.character_id,"left").drop("id","expiry_date","purchase_date","date_added","last_active","mentor_id","item_id").show()

+---------+----------+-------+-----+----------+------+--------+------------+--------+-----------+-----+
|     name|     guild|  class|level|experience|health|is_alive|character_id|quantity|is_equipped|value|
+---------+----------+-------+-----+----------+------+--------+------------+--------+-----------+-----+
|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true|           1|       2|      FALSE|   50|
|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true|           1|       1|       TRUE| 1500|
|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|           2|       1|      FALSE|   60|
|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|           2|       1|       TRUE| 1100|
|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|           3|       1|      FALSE|  200|
|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|           3|       1|       TRUE| 1300|
|    Frodo| Shirefolk| Hobbit|   12|      2500|  50.5|   false| 

## Right Outer Join

In [0]:
df_characters.join(df_inventory, df_characters.id == df_inventory.character_id,"right").drop("id","expiry_date","purchase_date","date_added","last_active","mentor_id","item_id").show()

+---------+----------+-------+-----+----------+------+--------+------------+--------+-----------+-----+
|     name|     guild|  class|level|experience|health|is_alive|character_id|quantity|is_equipped|value|
+---------+----------+-------+-----+----------+------+--------+------------+--------+-----------+-----+
|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true|           1|       1|       TRUE| 1500|
|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true|           1|       2|      FALSE|   50|
|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|           2|       1|       TRUE| 1100|
|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|           2|       1|      FALSE|   60|
|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|           3|       1|       TRUE| 1300|
|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|           3|       1|      FALSE|  200|
|    Frodo| Shirefolk| Hobbit|   12|      2500|  50.5|   false| 

## Left Semi Join

In [0]:
#Can be used semi, left_semi, leftsemi
df_inventory.join(df_items,df_inventory.item_id == df_items.id,"semi").show(truncate=False)

+---+------------+-------+--------+-----------+--------------------+--------------------+-----+
| id|character_id|item_id|quantity|is_equipped|       purchase_date|         expiry_date|value|
+---+------------+-------+--------+-----------+--------------------+--------------------+-----+
|  2|           1|      4|       2|      FALSE|2023-09-12 09:00:...|2023-10-12 09:00:...|   50|
|  3|           2|      2|       1|       TRUE|2023-09-11 08:30:...|                null| 1100|
|  5|           3|      3|       1|       TRUE|2023-09-15 08:00:...|                null| 1300|
|  6|           3|      6|       1|      FALSE|2023-09-16 09:00:...|                null|  200|
|  7|           4|      9|       2|      FALSE|2023-09-20 08:00:...|2023-10-20 08:00:...|  150|
|  8|           5|     10|       1|      FALSE|2023-09-21 10:00:...|2023-10-21 10:00:...|  160|
|  9|           6|     11|       1|       TRUE|2023-09-22 11:00:...|                null| 1700|
| 10|           6|     16|       1|     

## Right Semi Join

In [0]:
#Can be used anti, right_semi, rightsemi
df_inventory.join(df_items,df_inventory.item_id == df_items.id,"anti").show(truncate=False)

+---+------------+-------+--------+-----------+-----------------------+-----------------------+-----+
|id |character_id|item_id|quantity|is_equipped|purchase_date          |expiry_date            |value|
+---+------------+-------+--------+-----------+-----------------------+-----------------------+-----+
|1  |1           |99     |1       |TRUE       |2023-09-10 08:00:00 UTC|null                   |1500 |
|4  |2           |101    |1       |FALSE      |2023-09-12 10:00:00 UTC|2023-10-12 10:00:00 UTC|60   |
|20 |15          |121    |1       |FALSE      |2023-09-24 08:00:00 UTC|null                   |2000 |
+---+------------+-------+--------+-----------+-----------------------+-----------------------+-----+

