## 1 Generate mock data

We can specify the data time or use the current time

All the data will generate at *'/Volumes/dev/bronze/raw_data/files/2025-02-01.csv'*

In [0]:
# import library
library(lubridate)
library(readr)
library(fs)
library(dplyr)

In [0]:
# generate a widgets to get date，defaut day is yesterday

dbutils.widgets.text("Date", format(Sys.Date()-1, "%Y-%m-%d"), "Date of generation")

In [0]:
# get input_date

input_date <- dbutils.widgets.get("Date")

print(input_date)

In [0]:
# check the input date

if (!grepl("^\\d{4}-\\d{2}-\\d{2}$", input_date)) {
  stop("Invalid date format! Please enter the date as YYYY-MM-DD")
}

In [0]:
# make sure the same day will have the same data

seed_day=as.numeric(format(as.Date(input_date), "%d"))
print(seed_day)

In [0]:
%sql

-- create brown layer

CREATE SCHEMA IF NOT EXISTS dev.silver;

In [0]:
# generate the file path

dir_path <- "/Volumes/dev/bronze/raw_data/files/"
csv_path <- paste0(dir_path, input_date, ".csv")

In [0]:
%sql
create volume if not exists dev.bronze.raw_data
comment 'This is the RAW data Volume'

In [0]:
# generate the file path
if (!dir.exists(dir_path)) {
  dbutils.fs.mkdirs(dir_path)
}

In [0]:
# 1. Date_time Column generate

start_time <- as.POSIXct(paste(input_date, "09:00:00"), format="%Y-%m-%d %H:%M:%S", tz="UTC")
end_time <- as.POSIXct(paste(input_date, "17:00:00"), format="%Y-%m-%d %H:%M:%S", tz="UTC")

Date_time <- seq(from = start_time, to = end_time, by = "30 sec")
Date_time <- format(Date_time, "%Y-%m-%dT%H:%M:%S")

df <- data.frame(Date_time)
write_csv(df, csv_path)

head(df)

In [0]:
num_rows <- nrow(df)

In [0]:
# 2. A-1 column

set.seed(100+seed_day)
generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 1) { 
      base <- 1000
    } else { 
      base <- 1300
    }
    values <- c(values, base + sample(-10:10, 1))  
  }
  return(values)
}

df$EvapA_Var1 <- generate_values(num_rows) 

write_csv(df, csv_path)

head(df)

In [0]:
# 3. A-2 column

set.seed(101+seed_day)
generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 1) {  
      base <- 60
    } else {  
      base <- 85
    }
    values <- c(values, round(base + runif(1,-2, 2),1))  
  }
  return(values)
}

df$EvapA_Var2 <- generate_values(num_rows)

write_csv(df, csv_path)

head(df)

In [0]:
# 4. A-S column

generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 1) {  
      base <- 0
    } else {  
      base <- 1
    }
    values <- c(values, base)  
  }
  return(values)
}

df$EvapA_State <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
# 5. B-1
set.seed(200+seed_day)  
generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 0) {  
      base <- 1000
    } else {  
      base <- 1300
    }
    values <- c(values, base + sample(-10:10, 1))  
  }
  return(values)
}

df$EvapB_Var1 <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
# 6. B-2 column

set.seed(201+seed_day)
generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 0) {  # 85 +- 1
      base <- 60
    } else {  
      base <- 85
    }
    values <- c(values, round(base + runif(1,-2, 2),1))  
  }
  return(values)
}

df$EvapB_Var2 <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
# 7. B-S column

generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 0) {  
      base <- 0
    } else {  
      base <- 1
    }
    values <- c(values, base)  
  }
  return(values)
}

df$EvapB_State <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
# 8. C-1
set.seed(300+seed_day)  
generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 2) {  
      base <- 1000
    } else {  
      base <- 1300
    }
    values <- c(values, base + sample(-10:10, 1))  
  }
  return(values)
}

df$EvapC_Var1 <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
# 9. C-2 column

set.seed(301+seed_day)
generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 2) {  # 85 +- 1
      base <- 60
    } else {  
      base <- 85
    }
    values <- c(values, round(base + runif(1,-2, 2),1))  
  }
  return(values)
}

df$EvapC_Var2 <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
# 10. C-S column

generate_values <- function(n) {
  values <- c()
  for (i in 1:n) {
    if ((i - 1) %/% 3 %% 3 == 2) {  
      base <- 0
    } else {  
      base <- 1
    }
    values <- c(values, base)  
  }
  return(values)
}

df$EvapC_State <- generate_values(num_rows)  

write_csv(df, csv_path)

head(df)

In [0]:
dbutils.notebook.exit(num_rows)