# Section 02: Left and Right Joins

In [17]:
library(dplyr)
library(tidyverse)


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mforcats[39m 0.5.1
[32m✔[39m [34mreadr  [39m 2.1.2     

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [18]:
inventories <- read.csv("C:\\Users\\mosman\\Desktop\\Github\\Data_Scientist_with_R\\00_Datasets\\inventories.csv", 
                    header=TRUE)

inventory_parts <- read.csv("C:\\Users\\mosman\\Desktop\\Github\\Data_Scientist_with_R\\00_Datasets\\inventory_parts.csv", 
                    header=TRUE)


In [26]:
inventory_parts_joined <- inventories %>%
  inner_join(inventory_parts, by =c("id" = "inventory_id")) %>%
  select(-id, -version) %>%
  arrange(desc(quantity))

millennium_falcon <- inventory_parts_joined %>%
  filter(set_num == "7965-1")

star_destroyer <- inventory_parts_joined %>%
  filter(set_num == "75190-1")

### `01-Left joining two sets by part and color`

- Left join the `star_destroyer` and `millennium_falcon` tables on the `part_num` and `color_id` columns with the suffixes `_falcon` and `_star_destroyer`.

In [27]:
# Combine the star_destroyer and millennium_falcon tables
millennium_falcon %>%
  left_join(star_destroyer, by = c("part_num", "color_id"), 
            suffix = c("_falcon", "_star_destroyer"))

set_num_falcon,part_num,color_id,quantity_falcon,is_spare_falcon,set_num_star_destroyer,quantity_star_destroyer,is_spare_star_destroyer
<chr>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<chr>
7965-1,63868,71,62,f,,,
7965-1,3023,0,60,f,,,
7965-1,3021,72,46,f,75190-1,6,f
7965-1,2780,0,37,f,75190-1,36,f
7965-1,2780,0,37,f,75190-1,1,t
7965-1,60478,72,36,f,,,
7965-1,6636,71,34,f,75190-1,2,f
7965-1,3009,71,28,f,75190-1,2,f
7965-1,3665,71,22,f,,,
7965-1,2412b,72,20,f,75190-1,11,f


### `02-Left joining two sets by color`
- Sum the `quantity` column by `color_id` in the Millennium Falcon dataset.
- Now, sum the `quantity` column by `color_id` in the Star Destroyer dataset.

- Left join the two datasets, `millennium_falcon_colors` and `star_destroyer_colors`, using the `color_id` column and the `_falcon` and `_star_destroyer` suffixes.

In [31]:
# Aggregate Millennium Falcon for the total quantity in each part
millennium_falcon_colors <- millennium_falcon %>%
  group_by(color_id) %>%
  summarize(total_quantity = sum(quantity))

# Aggregate Star Destroyer for the total quantity in each part
star_destroyer_colors <- star_destroyer %>%
  group_by(color_id) %>%
  summarize(total_quantity = sum(quantity))

# Left join the Millennium Falcon colors to the Star Destroyer colors
millennium_falcon_colors %>%
  left_join(star_destroyer_colors, by = "color_id",
            suffix = c("_falcon", "_star_destroyer") )

color_id,total_quantity_falcon,total_quantity_star_destroyer
<int>,<int>,<int>
0,196,327.0
1,15,24.0
4,17,56.0
14,3,5.0
15,12,13.0
19,91,13.0
28,3,16.0
33,5,
36,1,14.0
41,6,16.0


### `03-Finding an observation that doesn't have a match`
- Use a `left_join` to join together `sets` and `inventory_version_1` using their common column.
- `filter` for where the `version` column is `NA` using `is.na`.

In [34]:
sets <- read.csv("C:\\Users\\mosman\\Desktop\\Github\\Data_Scientist_with_R\\00_Datasets\\sets.csv", 
                    header=TRUE)

In [35]:
inventory_version_1 <- inventories %>%
  filter(version == 1)

# Join versions to sets
sets %>%
  left_join(inventory_version_1, by = "set_num") %>%
  # Filter for where version is na
  filter(is.na(version))  

set_num,name,year,theme_id,num_parts,id,version
<chr>,<chr>,<int>,<int>,<int>,<int>,<int>
10261-1,Roller Coaster,2018,673,4124,,
10875-1,Cargo Train,2018,634,105,,
76081-1,The Milano vs. The Abilisk,2017,704,462,,


### `04-Counting part colors`
- Use the `count` verb to count each `part_cat_id` in the `parts` table.
- Use a `right_join` to join `part_categories`. You'll need to use the `part_cat_id` from the count and the `id` column from `part_categories`.

- filter for where the column `n` is `NA`.



In [37]:
part_categories <- read.csv("C:\\Users\\mosman\\Desktop\\Github\\Data_Scientist_with_R\\00_Datasets\\part_categories.csv", 
                    header=TRUE)

In [44]:
parts %>%
  count(part_cat_id) %>%
  right_join(part_categories, by = c("part_cat_id" = "id")) %>%
  # Filter for NA
  filter(is.na(n))

part_cat_id,n,name
<int>,<int>,<chr>


### `05-Cleaning up your count`
- Use `replace_na` to replace NAs in the `n` column with the value `0`.

In [45]:
library(tidyr)

In [46]:
parts %>%
  count(part_cat_id) %>%
  right_join(part_categories, by = c("part_cat_id" = "id")) %>%
  # Use replace_na to replace missing values in the n column
  replace_na(list(n = 0))

part_cat_id,n,name
<int>,<int>,<chr>
1,238,Baseplates
3,531,Bricks Sloped
4,4298,"Duplo, Quatro and Primo"
5,169,Bricks Special
6,264,Bricks Wedged
7,180,Containers
8,27,Technic Bricks
9,360,Plates Special
11,1429,Bricks
12,109,Technic Connectors


### `06-Joining themes to their children`

- Inner join `themes` to their own children, resulting in the suffixes `"_parent"` and `"_child"`, respectively.
- Filter this table to find the children of the `"Harry Potter"` theme

In [47]:
themes <- read.csv("C:\\Users\\mosman\\Desktop\\Github\\Data_Scientist_with_R\\00_Datasets\\themes.csv", 
                    header=TRUE)

In [50]:
themes %>% 
  # Inner join the themes table
  inner_join(themes, by = c("id" = "parent_id"), suffix = c("_parent", "_child")) %>%
  # Filter for the "Harry Potter" parent name 
  filter(name_parent == "Harry Potter")

id,name_parent,parent_id,id_child,name_child
<int>,<chr>,<int>,<int>,<chr>
246,Harry Potter,,667,Fantastic Beasts


### `07-Joining themes to their grandchildren`

- Use another inner join to combine themes again with itself.
    - Be sure to use the suffixes `"_parent"` and `"_grandchild"` so the columns in the resulting table are clear.
    - Update the `by` argument to specify the correct columns to join on. If you're unsure of what columns to join on, it might help to look at the result of the first join to get a feel for it.

In [54]:
# Join themes to itself again to find the grandchild relationships
themes %>% 
  inner_join(themes, by = c("id" = "parent_id"), suffix = c("_parent", "_child")) %>%
  inner_join(themes, by = c("id_child" = "parent_id"), suffix = c("_parent", "_grandchild"))

id_parent,name_parent,parent_id,id_child,name_child,id_grandchild,name
<int>,<chr>,<int>,<int>,<chr>,<int>,<chr>
22,Creator,,23,Basic Model,34,Building
22,Creator,,23,Basic Model,35,Cargo
147,Pirates,,148,Pirates I,149,Imperial Armada
147,Pirates,,148,Pirates I,151,Imperial Soldiers
147,Pirates,,148,Pirates I,152,Islanders
206,Seasonal,,207,Advent,208,City
206,Seasonal,,207,Advent,209,Star Wars
206,Seasonal,,207,Advent,210,Belville
206,Seasonal,,207,Advent,211,Castle
206,Seasonal,,207,Advent,212,Classic Basic


### `08-Left joining a table to itself`
- Left join the themes table to its own children, with the suffixes `_parent` and `_child` respectively.
- Filter the result of the join to find themes that have no children.

In [55]:
themes %>% 
  # Left join the themes table to its own children
  left_join(themes, by = c("id" = "parent_id"), 
            suffix = c("_parent", "_child")) %>%
  # Filter for themes that have no child themes
  filter(is.na(name_child))

id,name_parent,parent_id,id_child,name_child
<int>,<chr>,<int>,<int>,<chr>
3,Competition,1,,
4,Expert Builder,1,,
16,RoboRiders,1,,
17,Speed Slammers,1,,
18,Star Wars,1,,
19,Supplemental,1,,
20,Throwbot Slizer,1,,
21,Universal Building Set,1,,
34,Building,23,,
35,Cargo,23,,


### `The End`