-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathmk_data.Rmd
61 lines (49 loc) · 1.69 KB
/
mk_data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
---
title: "mk_data"
output: github_document
date: "2024-09-06"
---
```{r, results='hide',warning=FALSE,message=FALSE,error=FALSE}
library(dplyr)
```
```{r}
set.seed(2024)
d <- read.csv('Roxie_schedule_original.csv', strip.white = TRUE, stringsAsFactors = FALSE)
d$Date <- as.Date(d$Date, format='%Y-%B-%d')
d$EstimatedAttendance <- sample(c(233, 47), size=nrow(d), replace = TRUE)
d$Attendance <- round(d$EstimatedAttendance * runif(n = nrow(d)))
# # some estimates are right
# attendance_matches_indexes <- sort(sample.int(nrow(d), 0.05 * nrow(d), replace = FALSE))
# d$Attendance[attendance_matches_indexes] <- d$EstimatedAttendance[attendance_matches_indexes]
# match_table <- table(format(d$Date, format='%B'), d$EstimatedAttendance == d$Attendance)
# stopifnot(sum(match_table > 0) == 4)
#
# knitr::kable(match_table)
```
```{r}
d$PopcornSales <- round(runif(n = nrow(d), min = 0.1, max = 0.2) * d$Attendance)
popcorn_sales <- d |>
group_by(Date) |>
summarize(PopcornSales = sum(PopcornSales)) |>
ungroup() |>
filter(format(Date, '%B') == 'August') |>
as.data.frame()
write.csv(popcorn_sales, 'popcorn_sales.csv', row.names = FALSE)
knitr::kable(head(popcorn_sales))
```
```{r}
d$PopcornSales <- NULL
d$Attendance[format(d$Date, '%B') != 'August'] = d$EstimatedAttendance[format(d$Date, '%B') != 'August']
d_mixed <- d
d_mixed$EstimatedAttendance <- NULL
# d_mixed <- d_mixed[-1, , drop = FALSE]
write.csv(d_mixed, 'Roxie_schedule_as_known_after_August.csv', row.names = FALSE)
knitr::kable(head(d_mixed))
```
```{r}
d_est <- d
d_est$Attendance <- NULL
# d_est <- d_est[-2, , drop = FALSE]
write.csv(d_est, 'Roxie_schedule_as_known_before_August.csv', row.names = FALSE)
knitr::kable(head(d_est))
```