In [1]:
install.packages("plotly")
library(tidyverse, warn.conflicts = FALSE)
library(plotly, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
library(parallel, warn.conflicts = FALSE)

Updating HTML index of packages in '.Library'
Making 'packages.html' ... done
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.3     ✔ purrr   0.3.4
✔ tibble  3.1.0     ✔ dplyr   1.0.5
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
service_name <- c(
    "Cut and Finish",
    "Wet Cut",
    "Blow Dry",
    "Hair Up",
    "Bridal Hair",
    "Full head Highlights",
    "Full Head Tint",
    "Roots Tint",
    "Tonal Glossing",
    "Toner",
    "Creative Colour",
    "Balayage",
    "Single foils",
    "Treatments",
    "Olaplex super service",
    "Keratin Tamer",
    "Full Head Curls",
    "Fashion Wind"
)

service_price <- c(
    48,
    40,
    34,
    40,
    120,
    100,
    73,
    63,
    73,
    20,
    115,
    100,
    3.50,
    20,
    30,
    205,
    80,
    180
)

services <- tibble(
    id = 1:length(service_name),
    service_name = service_name,
    price = service_price
)

In [3]:
services

id,service_name,price
1,Cut and Finish,48.0
2,Wet Cut,40.0
3,Blow Dry,34.0
4,Hair Up,40.0
5,Bridal Hair,120.0
6,Full head Highlights,100.0
7,Full Head Tint,73.0
8,Roots Tint,63.0
9,Tonal Glossing,73.0
10,Toner,20.0


In [4]:
product_name <- c(
    "Clean Beauty",
    "Tea Tree",
    "Wild Ginger",
    "Extrabody",
    "Moisture",
    "Smoothing",
    "Blonde",
    "Color protect",
    "Super strong",
    "Curls",
    "Neon",
    "Invisiblewear",
    "Original",
    "Clarifying",
    "Sun",
    "Kids",
    "Styling"
)

product_price <- as.integer(runif(length(product_name), 30, 60))

products <- tibble(
    id = 1:length(product_name),
    product_name = product_name,
    price = product_price
)

In [5]:
products

id,product_name,price
1,Clean Beauty,43
2,Tea Tree,54
3,Wild Ginger,50
4,Extrabody,51
5,Moisture,49
6,Smoothing,36
7,Blonde,36
8,Color protect,53
9,Super strong,46
10,Curls,38


In [6]:
costumers <- read_csv("mock_costumer.csv")
costumers

Parsed with column specification:
cols(
  id = col_double(),
  first_name = col_character(),
  last_name = col_character(),
  email = col_character(),
  tel = col_character()
)


id,first_name,last_name,email,tel
1,Cathi,Tramel,ctramel0@prweb.com,297-508-9357
2,Suzanna,McIlvoray,smcilvoray1@edublogs.org,399-870-4897
3,Sandie,Allsworth,sallsworth2@sourceforge.net,970-182-2703
4,Dulciana,Rillatt,drillatt3@umich.edu,345-795-6271
5,Hall,Shoemark,hshoemark4@ebay.com,821-495-9252
6,Jacquenette,Whate,jwhate5@ihg.com,268-580-7652
7,Rowena,Antyukhin,rantyukhin6@amazon.co.jp,328-800-5742
8,Billye,Hampshaw,bhampshaw7@icio.us,957-495-2686
9,Newton,Letherbury,nletherbury8@examiner.com,775-103-7603
10,Augie,Mansbridge,amansbridge9@topsy.com,799-610-4797


In [7]:
dates <- c(seq(as.Date('2020-01-01'),as.Date('2021-05-31'),by = 1))

In [9]:
# for each date
# sample n costumer from gaussian distribution
# for each costumer
# sample k services and add noise to the price
# sample m products

generate_interactions <- function(dates) {
    
    services_done <- tibble(id_service = integer(), id_costumer = integer(), price = numeric(), date = as.Date(integer(), origin="2020-01-01"))
    products_sold <- tibble(id_product = integer(), id_costumer = integer(), price = numeric(), date = as.Date(integer(), origin="2020-01-01"))

    for (i in 1:length(dates)) {
        date <- dates[i]
        n <- as.integer(rnorm(1, mean = 15, sd = 15/3))
        sample_costumers <- costumers %>%
                                sample_n(n) %>%
                                pull(id)

        for (sample_costumer in sample_costumers) {
            k <- as.integer(rnorm(1, mean = 3, sd = 1))
            if (k < 1) {
                k <- 1
            }
            sample_services <- services %>%
                                    sample_n(k) %>%
                                    mutate(id_service = id, 
                                           id_costumer = as.integer(sample_costumer),
                                           price = round(runif(1, -3, 3), 2) + price,
                                           date = date
                                          ) %>%
                                    select(id_service, id_costumer, price, date)

            services_done <- services_done %>% bind_rows(sample_services)

            k <- as.integer(rnorm(1, mean = 0, sd = 1))
            if (k < 0) {
                k <- 0
            }

            sample_products <- products %>%
                                    sample_n(k) %>%
                                    mutate(id_product = id, 
                                           id_costumer = as.integer(sample_costumer),
                                           price = price,
                                           date = date
                                          ) %>%
                                    select(id_product, id_costumer, price, date)

            products_sold <- products_sold %>% bind_rows(sample_products)

        }
    }
    return(list("services_done" = services_done, "products_sold" = products_sold))
}


In [28]:
# not parallel computation
t0 <- proc.time()
interactions <- generate_interactions(dates)
services_done <- interactions$services_done
products_sold <- interactions$products_sold
t1 <- proc.time()
t1 - t0

   user  system elapsed 
  5.962   0.036   6.044 

In [32]:
generate_interactions_parallel <- function(date) {
    
    services_done <- tibble(id_service = integer(), id_costumer = integer(), price = numeric(), date = as.Date(integer(), origin="2020-01-01"))
    products_sold <- tibble(id_product = integer(), id_costumer = integer(), price = numeric(), date = as.Date(integer(), origin="2020-01-01"))

    n <- as.integer(rnorm(1, mean = 15, sd = 15/3))
    sample_costumers <- costumers %>%
                            sample_n(n) %>%
                            pull(id)

    for (sample_costumer in sample_costumers) {
        k <- as.integer(rnorm(1, mean = 3, sd = 1))
        if (k < 1) {
            k <- 1
        }
        sample_services <- services %>%
                                sample_n(k) %>%
                                mutate(id_service = id, 
                                       id_costumer = as.integer(sample_costumer),
                                       price = round(runif(1, -3, 3), 2) + price,
                                       date = date
                                      ) %>%
                                select(id_service, id_costumer, price, date)

        services_done <- services_done %>% bind_rows(sample_services)

        k <- as.integer(rnorm(1, mean = 0, sd = 1))
        if (k < 0) {
            k <- 0
        }

        sample_products <- products %>%
                                sample_n(k) %>%
                                mutate(id_product = id, 
                                       id_costumer = as.integer(sample_costumer),
                                       price = price,
                                       date = date
                                      ) %>%
                                select(id_product, id_costumer, price, date)

        products_sold <- products_sold %>% bind_rows(sample_products)

    }
    
    return(list("services_done" = services_done, "products_sold" = products_sold))
}



In [34]:
t0 <- proc.time()

cores <- detectCores()
res <- mclapply(dates, FUN = generate_interactions_parallel, mc.cores = 4)

In [35]:
res

id_service,id_costumer,price,date
6,282,100.75,2020-01-01
9,282,73.75,2020-01-01
18,278,182.94,2020-01-01
8,278,65.94,2020-01-01
7,278,75.94,2020-01-01
16,278,207.94,2020-01-01
6,278,102.94,2020-01-01
8,192,62.55,2020-01-01
13,192,3.05,2020-01-01
18,192,179.55,2020-01-01

id_service,id_costumer,price,date
13,28,4.9,2020-01-02
10,28,21.4,2020-01-02
6,28,101.4,2020-01-02
10,69,17.09,2020-01-02
15,69,27.09,2020-01-02
18,69,177.09,2020-01-02
12,79,102.61,2020-01-02
15,79,32.61,2020-01-02
7,79,75.61,2020-01-02
3,111,36.79,2020-01-02

id_service,id_costumer,price,date
14,98,19.7,2020-01-03
7,98,72.7,2020-01-03
16,98,204.7,2020-01-03
4,29,39.63,2020-01-03
18,29,179.63,2020-01-03
2,29,39.63,2020-01-03
10,179,21.46,2020-01-03
16,179,206.46,2020-01-03
14,225,19.63,2020-01-03
5,225,119.63,2020-01-03

id_service,id_costumer,price,date
3,160,35.66,2020-01-04
18,160,181.66,2020-01-04
7,160,74.66,2020-01-04
16,160,206.66,2020-01-04
8,54,62.94,2020-01-04
17,54,79.94,2020-01-04
13,54,3.44,2020-01-04
3,198,33.81,2020-01-04
4,198,39.81,2020-01-04
1,198,47.81,2020-01-04

id_service,id_costumer,price,date
16,287,206.48,2020-01-05
10,287,21.48,2020-01-05
18,287,181.48,2020-01-05
10,201,19.15,2020-01-05
13,201,2.65,2020-01-05
18,107,181.41,2020-01-05
5,105,119.66,2020-01-05
3,105,33.66,2020-01-05
12,105,99.66,2020-01-05
13,105,3.16,2020-01-05

id_service,id_costumer,price,date
5,225,121.66,2020-01-06
16,225,206.66,2020-01-06
11,225,116.66,2020-01-06
2,185,39.58,2020-01-06
12,185,99.58,2020-01-06
10,185,19.58,2020-01-06
14,173,17.65,2020-01-06
17,173,77.65,2020-01-06
16,173,202.65,2020-01-06
10,173,17.65,2020-01-06

id_service,id_costumer,price,date
12,292,101.55,2020-01-07
16,292,206.55,2020-01-07
16,57,207.8,2020-01-07
14,57,22.8,2020-01-07
15,66,27.4,2020-01-07
7,66,70.4,2020-01-07
13,48,1.02,2020-01-07
7,48,70.52,2020-01-07
18,68,179.51,2020-01-07
17,68,79.51,2020-01-07

id_service,id_costumer,price,date
1,221,48.74,2020-01-08
8,221,63.74,2020-01-08
11,221,115.74,2020-01-08
13,95,5.32,2020-01-08
5,95,121.82,2020-01-08
6,159,97.03,2020-01-08
8,159,60.03,2020-01-08
17,159,77.03,2020-01-08
16,33,204.88,2020-01-08
6,33,99.88,2020-01-08

id_service,id_costumer,price,date
6,121,100.91,2020-01-09
15,121,30.91,2020-01-09
1,202,48.88,2020-01-09
6,202,100.88,2020-01-09
15,202,30.88,2020-01-09
11,298,116.85,2020-01-09
10,298,21.85,2020-01-09
8,298,64.85,2020-01-09
1,298,49.85,2020-01-09
13,67,6.16,2020-01-09

id_service,id_costumer,price,date
17,54,79.18,2020-01-10
10,54,19.18,2020-01-10
7,252,73.72,2020-01-10
4,252,40.72,2020-01-10
17,252,80.72,2020-01-10
4,81,42.28,2020-01-10
10,26,18.15,2020-01-10
15,26,28.15,2020-01-10
17,26,78.15,2020-01-10
12,26,98.15,2020-01-10

id_service,id_costumer,price,date
7,62,70.73,2020-01-11
18,62,177.73,2020-01-11
11,244,113.79,2020-01-11
12,244,98.79,2020-01-11
15,244,28.79,2020-01-11
13,141,3.44,2020-01-11
1,141,47.94,2020-01-11
12,201,99.47,2020-01-11
14,201,19.47,2020-01-11
9,201,72.47,2020-01-11

id_service,id_costumer,price,date
18,274,181.52,2020-01-12
16,274,206.52,2020-01-12
13,241,3.63,2020-01-12
8,241,63.13,2020-01-12
16,55,207.26,2020-01-12
5,55,122.26,2020-01-12
15,55,32.26,2020-01-12
9,165,72.52,2020-01-12
6,165,99.52,2020-01-12
4,165,39.52,2020-01-12

id_service,id_costumer,price,date
4,32,41.77,2020-01-13
15,32,31.77,2020-01-13
6,32,101.77,2020-01-13
17,8,80.83,2020-01-13
13,8,4.33,2020-01-13
9,175,73.27,2020-01-13
4,96,42.59,2020-01-13
15,96,32.59,2020-01-13
1,96,50.59,2020-01-13
12,241,100.57,2020-01-13

id_service,id_costumer,price,date
13,282,1.52,2020-01-14
11,105,113.77,2020-01-14
12,105,98.77,2020-01-14
13,105,2.27,2020-01-14
16,105,203.77,2020-01-14
8,105,61.77,2020-01-14
8,64,63.04,2020-01-14
10,64,20.04,2020-01-14
5,64,120.04,2020-01-14
11,39,116.59,2020-01-14

id_service,id_costumer,price,date
11,297,113.31,2020-01-15
16,297,203.31,2020-01-15
1,134,49.33,2020-01-15
10,134,21.33,2020-01-15
7,134,74.33,2020-01-15
13,134,4.83,2020-01-15
15,286,28.8,2020-01-15
16,220,203.61,2020-01-15
5,220,118.61,2020-01-15
9,220,71.61,2020-01-15

id_service,id_costumer,price,date
15,268,32.01,2020-01-16
9,268,75.01,2020-01-16
1,268,50.01,2020-01-16
6,268,102.01,2020-01-16
11,276,112.3,2020-01-16
6,29,100.1,2020-01-16
14,29,20.1,2020-01-16
11,29,115.1,2020-01-16
5,29,120.1,2020-01-16
13,138,6.32,2020-01-16

id_service,id_costumer,price,date
7,123,74.48,2020-01-17
4,123,41.48,2020-01-17
3,123,35.48,2020-01-17
1,105,47.62,2020-01-17
12,105,99.62,2020-01-17
18,105,179.62,2020-01-17
13,268,4.5,2020-01-17
15,268,31.0,2020-01-17
14,268,21.0,2020-01-17
13,158,3.0,2020-01-17

id_service,id_costumer,price,date
6,239,100.28,2020-01-18
16,239,205.28,2020-01-18
12,239,100.28,2020-01-18
4,81,42.7,2020-01-18
6,81,102.7,2020-01-18
7,81,75.7,2020-01-18
11,81,117.7,2020-01-18
5,81,122.7,2020-01-18
2,81,42.7,2020-01-18
3,269,31.76,2020-01-18

id_service,id_costumer,price,date
8,189,65.11,2020-01-19
2,189,42.11,2020-01-19
7,219,73.32,2020-01-19
15,219,30.32,2020-01-19
17,42,80.17,2020-01-19
1,42,48.17,2020-01-19
18,42,180.17,2020-01-19
5,42,120.17,2020-01-19
15,212,32.32,2020-01-19
12,212,102.32,2020-01-19

id_service,id_costumer,price,date
10,263,17.35,2020-01-20
1,263,45.35,2020-01-20
18,155,177.57,2020-01-20
17,155,77.57,2020-01-20
10,155,17.57,2020-01-20
11,155,112.57,2020-01-20
11,224,113.35,2020-01-20
16,64,202.71,2020-01-20
2,64,37.71,2020-01-20
13,64,1.21,2020-01-20

id_service,id_costumer,price,date
8,93,63.83,2020-01-21
14,93,20.83,2020-01-21
1,93,48.83,2020-01-21
15,97,29.24,2020-01-21
12,97,99.24,2020-01-21
11,76,116.57,2020-01-21
17,76,81.57,2020-01-21
5,189,117.7,2020-01-21
16,189,202.7,2020-01-21
3,234,35.58,2020-01-21

id_service,id_costumer,price,date
6,27,100.99,2020-01-22
16,27,205.99,2020-01-22
14,228,22.39,2020-01-22
16,228,207.39,2020-01-22
10,228,22.39,2020-01-22
8,147,63.79,2020-01-22
14,147,20.79,2020-01-22
15,215,32.69,2020-01-22
5,215,122.69,2020-01-22
14,215,22.69,2020-01-22

id_service,id_costumer,price,date
1,108,46.9,2020-01-23
18,108,178.9,2020-01-23
7,108,71.9,2020-01-23
16,46,205.69,2020-01-23
15,46,30.69,2020-01-23
7,151,72.03,2020-01-23
9,151,72.03,2020-01-23
15,151,29.03,2020-01-23
15,55,27.37,2020-01-23
8,55,60.37,2020-01-23

id_service,id_costumer,price,date
4,180,41.25,2020-01-24
7,180,74.25,2020-01-24
13,180,4.75,2020-01-24
17,180,81.25,2020-01-24
5,180,121.25,2020-01-24
13,207,3.12,2020-01-24
18,207,179.62,2020-01-24
7,207,72.62,2020-01-24
10,73,19.21,2020-01-24
2,102,39.53,2020-01-24

id_service,id_costumer,price,date
3,107,34.58,2020-01-25
10,107,20.58,2020-01-25
4,281,38.22,2020-01-25
18,281,178.22,2020-01-25
8,191,62.97,2020-01-25
16,191,204.97,2020-01-25
7,191,72.97,2020-01-25
13,191,3.47,2020-01-25
12,112,100.42,2020-01-25
1,112,48.42,2020-01-25

id_service,id_costumer,price,date
2,181,38.55,2020-01-26
5,181,118.55,2020-01-26
15,218,31.25,2020-01-26
11,218,116.25,2020-01-26
13,218,4.75,2020-01-26
2,35,40.62,2020-01-26
14,35,20.62,2020-01-26
10,35,20.62,2020-01-26
11,35,115.62,2020-01-26
1,130,47.92,2020-01-26

id_service,id_costumer,price,date
4,46,42.79,2020-01-27
7,46,75.79,2020-01-27
11,46,117.79,2020-01-27
13,46,6.29,2020-01-27
18,52,182.57,2020-01-27
4,52,42.57,2020-01-27
13,52,6.07,2020-01-27
18,110,180.47,2020-01-27
11,110,115.47,2020-01-27
18,276,178.7,2020-01-27

id_service,id_costumer,price,date
11,59,112.12,2020-01-28
13,59,0.62,2020-01-28
3,59,31.12,2020-01-28
7,59,70.12,2020-01-28
6,212,101.95,2020-01-28
14,212,21.95,2020-01-28
6,250,102.92,2020-01-28
10,250,22.92,2020-01-28
8,43,61.87,2020-01-28
6,43,98.87,2020-01-28

id_service,id_costumer,price,date
6,167,97.54,2020-01-29
13,167,1.04,2020-01-29
16,167,202.54,2020-01-29
3,78,36.51,2020-01-29
15,36,30.89,2020-01-29
10,36,20.89,2020-01-29
2,36,40.89,2020-01-29
10,57,20.85,2020-01-29
15,57,30.85,2020-01-29
18,57,180.85,2020-01-29

id_service,id_costumer,price,date
9,217,75.19,2020-01-30
1,217,50.19,2020-01-30
17,217,82.19,2020-01-30
11,217,117.19,2020-01-30
7,214,74.5,2020-01-30
13,214,5.0,2020-01-30
16,214,206.5,2020-01-30
11,265,113.09,2020-01-30
7,265,71.09,2020-01-30
4,265,38.09,2020-01-30

id_service,id_costumer,price,date
12,223,102.29,2020-01-31
7,223,75.29,2020-01-31
5,223,122.29,2020-01-31
9,44,73.92,2020-01-31
15,44,30.92,2020-01-31
12,44,100.92,2020-01-31
18,292,179.54,2020-01-31
1,292,47.54,2020-01-31
14,292,19.54,2020-01-31
5,256,121.06,2020-01-31


In [303]:
write.csv(services, "mock/services.csv", row.names=FALSE)
write.csv(products, "mock/products.csv", row.names=FALSE)
write.csv(costumers, "mock/costumers.csv", row.names=FALSE)
write.csv(services_done, "mock/services_done.csv", row.names=FALSE)
write.csv(products_sold, "mock/products_sold.csv", row.names=FALSE)

In [None]:
k <- as.integer(rnorm(1, mean = 0, sd = 1))
if (k < 0) {
    k <- 0
}

sample_products <- products %>%
                        sample_n(k) %>%
                        mutate(id_product = id, 
                               id_costumer = as.integer(sample_costumer),
                               price = price,
                               date = date
                              ) %>%
                        select(id_product, id_costumer, price, date)

products_sold <- products_sold %>% bind_rows(sample_products)