Skip to content

Commit

Permalink
Bug Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
FBerding committed Jun 12, 2023
1 parent f89087e commit 18f9ad6
Show file tree
Hide file tree
Showing 9 changed files with 200 additions and 101 deletions.
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ Trial
materials
test_model
tests/testthat/test_data/language_models
test/testthat/test_data/tmp
test/testthat/test_data/tmp/checkpoints
.h5
.git
.gitignore
_pkgdown.yml
Expand Down
146 changes: 73 additions & 73 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -1,76 +1,3 @@
num_hidden_layer=12,
num_attention_heads=12,
intermediate_size=3072,
hidden_act="gelu",
hidden_dropout_prob=0.1,
trace=FALSE))
expect_no_error(
create_roberta_model(
model_dir=testthat::test_path("test_data/roberta"),
vocab_raw_texts=example_data$text,
vocab_size=30522,
add_prefix_space=TRUE,
max_position_embeddings=512,
hidden_size=768,
num_hidden_layer=12,
num_attention_heads=12,
intermediate_size=3072,
hidden_act="gelu",
hidden_dropout_prob=0.1,
trace=FALSE))
})
test_that("train_tune_roberta_model", {
example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id1,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
expect_no_error(
train_tune_roberta_model(output_dir=testthat::test_path("test_data/roberta"),
model_dir_path=testthat::test_path("test_data/roberta"),
raw_texts= example_data$text[1:25],
p_mask=0.30,
val_size=0.1,
n_epoch=1,
batch_size=1,
chunk_size=512,
n_workers=1,
multi_process=FALSE,
trace=FALSE))
})
test_that("train_tune_roberta_model", {
example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id1,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
expect_no_error(
train_tune_roberta_model(output_dir=testthat::test_path("test_data/roberta"),
model_dir_path=testthat::test_path("test_data/roberta"),
raw_texts= example_data$text[1:25],
p_mask=0.30,
val_size=0.1,
n_epoch=1,
batch_size=1,
chunk_size=510,
n_workers=1,
multi_process=FALSE,
trace=FALSE))
})
test_embedding<-TextEmbeddingModel$new(
model_name = "test",
model_label = "test",
model_version = "0.0.1",
model_language = "german",
model_dir="Trial/Bert_Modelle",
method = "bert",
aggregation = "last",
max_length=256,
chunks = 4,
overlap = 10
)
test_embeddin$
test_embedding$transformer_components$model$config$max_length
test_embedding$transformer_components$model$config$max_position_embeddings
devtools::load_all()
test_that("train_tune_roberta_model", {
example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id1,
Expand Down Expand Up @@ -510,3 +437,76 @@ devtools::test()
devtools::document()
devtools::check()
reticulate::py_config()
devtools::test()
sample_values=c(
"a","A",
"b","B",
"c","C",
"d","D",
"e","E",
"f","F",
"g","G",
"h","H",
"i","I",
"j","J",
"k","K",
"l","L",
"m","M",
"n","N",
"o","O",
"p","P",
"q","Q",
"r","R",
"s","S",
"t","T",
"u","U",
"v","V",
"w","W",
"x","X",
"y","Y",
"z","Z",
seq(from=0,to=9,by=1)
)
sample_values
length=10
id_suffix=NULL
sample_values=c(
"a","A",
"b","B",
"c","C",
"d","D",
"e","E",
"f","F",
"g","G",
"h","H",
"i","I",
"j","J",
"k","K",
"l","L",
"m","M",
"n","N",
"o","O",
"p","P",
"q","Q",
"r","R",
"s","S",
"t","T",
"u","U",
"v","V",
"w","W",
"x","X",
"y","Y",
"z","Z",
seq(from=0,to=9,by=1)
)
id_suffix=sample(
x=sample_values,
size = length,
replace = TRUE)
id_suffix
id_suffix=paste(id_suffix,collapse = "")
id_suffix
devtools::document()
devtools::document()
devtools::document()
devtools::document()
10 changes: 7 additions & 3 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ jobs:
"tensorflow")
library(reticulate)
virtualenv_create("r-reticulate", Sys.which("python"))
virtualenv_install("r-reticulate", python_packages)
#virtualenv_create("r-reticulate", Sys.which("python"))
#virtualenv_install("r-reticulate", python_packages)
path_to_python <- virtualenv_python("r-reticulate")
conda_create("r-reticulate", Sys.which("python"))
conda_install("r-reticulate", python_packages)
#path_to_python <- virtualenv_python("r-reticulate")
path_to_python <- conda_python("r-reticulate")
writeLines(sprintf("RETICULATE_PYTHON=%s", path_to_python),
Sys.getenv("GITHUB_ENV"))
Expand Down
101 changes: 83 additions & 18 deletions R/aux_fct.R
Original file line number Diff line number Diff line change
Expand Up @@ -336,29 +336,45 @@ get_folds<-function(target,
fin_k_folds=k_folds
}

val_sample=NULL
final_assignments=NULL
for(cat in categories){
all_names=names(subset(target,target==cat))
used_names=NULL
tmp_regular_size=ceiling(length(all_names)/fin_k_folds)
condition=(sample_target==cat)
focused_targets=subset(x = sample_target,
subset = condition)
n_cases=length(focused_targets)

for(i in 1:fin_k_folds){
if(i==1){
possible_names=all_names
} else {
possible_names=setdiff(x=all_names,
y=used_names)
}
tmp_size=min(length(possible_names),tmp_regular_size)
selected_names<-sample(x=possible_names,
size=tmp_size,
replace=FALSE)
val_sample[i]=list(append(x=unlist(val_sample[i]),
values = selected_names))
used_names=append(used_names,values = selected_names)
cases_per_fold=vector(length = fin_k_folds)
cases_per_fold[]=ceiling(n_cases/fin_k_folds)

delta=sum(cases_per_fold)-n_cases
for(i in 1:delta){
cases_per_fold[1+(i-1)%%fin_k_folds]=cases_per_fold[1+(i-1)%%fin_k_folds]-1
}

possible_assignments=NULL
for(i in 1:length(cases_per_fold))
possible_assignments=append(
x=possible_assignments,
values=rep.int(x=i,
times = cases_per_fold[i])
)

assignments<-sample(
x=possible_assignments,
size=length(possible_assignments),
replace = FALSE
)
names(assignments)=names(focused_targets)
final_assignments=append(x=final_assignments,
values=assignments)
}

val_sample=NULL
for(i in 1:fin_k_folds){
condition=(final_assignments==i)
val_sample[i]=list(names(subset(x=final_assignments,
subset=condition)))
}

train_sample=NULL
for(i in 1:fin_k_folds){
Expand Down Expand Up @@ -773,3 +789,52 @@ get_n_chunks<-function(text_embeddings,features,times){
names(n_chunks)<-rownames(text_embeddings)
return(n_chunks)
}

#------------------------------------------------------------------------------
#'Generate ID Suffix for Objects.
#'
#'Function for generating an ID suffix for objects of class
#'\link{TextEmbeddingModel} and \link{TextEmbeddingClassifierNeuralNet}.
#'
#'@param length \code{int} determining the length of the id suffix.
#'@return Returns a \code{string} of the requested length
generate_id<-function(length=16){
id_suffix=NULL
sample_values=c(
"a","A",
"b","B",
"c","C",
"d","D",
"e","E",
"f","F",
"g","G",
"h","H",
"i","I",
"j","J",
"k","K",
"l","L",
"m","M",
"n","N",
"o","O",
"p","P",
"q","Q",
"r","R",
"s","S",
"t","T",
"u","U",
"v","V",
"w","W",
"x","X",
"y","Y",
"z","Z",
seq(from=0,to=9,by=1)
)


id_suffix=sample(
x=sample_values,
size = length,
replace = TRUE)
id_suffix=paste(id_suffix,collapse = "")
return(id_suffix)
}
5 changes: 2 additions & 3 deletions R/install_and_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
#'be installed.
#'@export
install_py_modules<-function(envname="aifeducation"){
relevant_modules<-c("os",
"transformers",
relevant_modules<-c("transformers",
"tokenizers",
"datasets",
"torch",
Expand Down Expand Up @@ -103,7 +102,7 @@ set_config_gpu_low_memory<-function(){
#'@export
set_config_tf_logger<-function(level="ERROR"){
logger<-tf$get_logger()
logger$setLevel(level)
logger$setLevel(level)
}

#'Sets the level for logging information in tensor flow.
Expand Down
12 changes: 10 additions & 2 deletions R/te_classifier_neuralnet_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class(
#'Name of the classifier.
name=NULL,

#'@field name_root ('character()')\cr
#'Root part of the name of the classifier.
name_root=NULL,

#'@field label ('character()')\cr
#'Label of the classifier used as the individual title.
label=NULL,
Expand Down Expand Up @@ -244,7 +248,8 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class(
#------------------------------------------------------------------------

#Setting Label and Name
self$name=name
self$name_root=name
self$name=paste0(self$name_root,"_id_",generate_id(16))
self$label=label

#Basic Information of Input and Target Data
Expand Down Expand Up @@ -759,6 +764,9 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class(

names_unlabeled=names(subset(data_targets,is.na(data_targets)==TRUE))

#Setting a new ID for the classifier
self$name=paste0(self$name_root,"_id_",generate_id(16))

for(iter in 1:folds$n_folds){
#---------------------------------------------
#Create a Train and Validation Sample
Expand Down Expand Up @@ -1790,7 +1798,7 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class(
#'@description Method for setting the license of the classifier.
#'@param license \code{string} containing the abbreviation of the license or
#'the license text.
set_license=function(license){
set_license=function(license="CC BY-NC-SA"){
private$model_info$model_license<-license
},
#'@description Method for setting the license of the classifier.
Expand Down
1 change: 1 addition & 0 deletions R/text_embedding_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,7 @@ TextEmbeddingModel<-R6::R6Class(
return(list(
model_license=private$model_info$model_license,
model_name=private$model_info$model_name,
model_label=private$model_info$model_label,
model_date=private$model_info$model_date,
model_version=private$model_info$model_version,
model_language=private$model_info$model_language
Expand Down
5 changes: 4 additions & 1 deletion man/TextEmbeddingClassifierNeuralNet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 18f9ad6

Please sign in to comment.