In [None]:
library(tidyverse)
library(janitor)


In [None]:
# Read in the pumpkins dataset from the csv file
pumpkins <- read_csv("pumpkins.csv")

In [None]:
#clean up column names
pumpkins <- pumpkins %>% clean_names()


In [None]:
# Use explain to show what the clean_names function does

In [None]:
#display an overview of the data 
glimpse(pumpkins)

In [None]:
# display 5 random rows
sample_n(pumpkins, 5)


In [None]:
# show average high price by package and color
pumpkins %>% 
  group_by(package, color) %>% 
  summarize(avg_high_price = mean(high_price, na.rm = TRUE)) %>% 
  arrange(desc(avg_high_price))

In [None]:
# analysis of variance of high price by origin, color and item_size using aov and assigning to a variable called model
model <- aov(high_price ~ origin + color + item_size, data = pumpkins)



In [None]:
# display AOV results
summary(model)

In [None]:
# predict ppb for a pumpkin with the following characteristics origin OHIO, color ORANGE, item_size med 
predict(model, data.frame(origin = "OHIO", color = "ORANGE", item_size = "med"))


In [None]:
# filter rows where "bushel" is found in the package name
pumpkins %>% 
  filter(str_detect(package, "bushel")) %>% 
  arrange(desc(high_price)) %>% 
  select(package, low_price, high_price)



In [None]:
# calculate price per bushel as probability using the package name
pumpkins %>% 
  mutate(ppb = if_else(str_detect(package, "bushel"), high_price, high_price / 24)) %>% 
  arrange(desc(ppb)) %>% 
  select(package, low_price, high_price, ppb)
    


In [None]:
# table of average ppb by color and item_size
pumpkins %>% 
  mutate(ppb = if_else(str_detect(package, "bushel"), high_price, high_price / 24)) %>% 
  group_by(color, item_size) %>% 
  summarize(avg_ppb = mean(ppb, na.rm = TRUE)) %>% 
  arrange(desc(avg_ppb))

In [None]:
# create a table of counts by item_size and color 
pumpkins %>% 
  group_by(item_size, color) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count))

In [None]:
# now please create a pivot table of counts by item_size and color 
pumpkins %>% 
  group_by(item_size, color) %>% 
  summarize(count = n()) %>% 
  pivot_wider(names_from = item_size, values_from = count, values_fill = 0)

In [None]:
TODO
- keep as backup 
- create new file with commnets and markdown titles
- push into the repo
- create 2 slides for the demo
- 1 slide about jupyter notebooks (links and guides)
- 1 slide about security with Copilot
- General overview of Copilot and how it works
