diff --git a/DESCRIPTION b/DESCRIPTION index 8e54d2b..5d50708 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: aifeducation Title: Artificial Intelligence for Education -Version: 0.2.1.9003 +Version: 0.3.0.9000 Authors@R: c( person("Berding", "Florian", , "florian.berding@uni-hamburg.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3593-1695")), diff --git a/NAMESPACE b/NAMESPACE index 9de5fd2..448c0e2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(set_config_cpu_only) export(set_config_gpu_low_memory) export(set_config_os_environ_logger) export(set_config_tf_logger) +export(set_transformers_logger) export(train_tune_bert_model) export(train_tune_deberta_v2_model) export(train_tune_funnel_model) diff --git a/R/aux_fct.R b/R/aux_fct.R index 5fb1016..f56b9cf 100644 --- a/R/aux_fct.R +++ b/R/aux_fct.R @@ -156,7 +156,9 @@ return(TRUE) #' #'@param true_values \code{factor} containing the true labels/categories. #'@param predicted_values \code{factor} containing the predicted labels/categories. -#'@return Returns a \code{vector} with the following reliability measures: +#'@param return_names_only \code{bool} If \code{TRUE} returns only the names +#'of the resulting vector. Use {FALSE} to request computation of the values. +#'@return If \code{return_names_only=FALSE} returns a \code{vector} with the following reliability measures: #'#'\itemize{ #'\item{\strong{iota_index: }}{Iota Index from the Iota Reliability Concept Version 2.} #'\item{\strong{min_iota2: }}{Minimal Iota from Iota Reliability Concept Version 2.} @@ -178,14 +180,14 @@ return(TRUE) #'\item{\strong{gwet_ac: }}{Gwet's AC1/AC2 agreement coefficient.} #'} #' +#'@return If \code{return_names_only=TRUE} returns only the names of the vector elements. +#' #'@family Auxiliary Functions #' #'@export -get_coder_metrics<-function(true_values, - predicted_values){ -val_res=iotarelr::check_new_rater(true_values = true_values, - assigned_values = predicted_values, - free_aem = TRUE) +get_coder_metrics<-function(true_values=NULL, + predicted_values=NULL, + return_names_only=FALSE){ metric_names=c("iota_index", "min_iota2", @@ -208,54 +210,62 @@ val_res=iotarelr::check_new_rater(true_values = true_values, metric_values=vector(length = length(metric_names)) names(metric_values)=metric_names - val_res=iotarelr::check_new_rater(true_values = true_values, - assigned_values = predicted_values, - free_aem = FALSE) - val_res_free=iotarelr::check_new_rater(true_values = true_values, - assigned_values = predicted_values, - free_aem = TRUE) - - metric_values["iota_index"]=val_res$scale_level$iota_index - - metric_values["min_iota2"]=min(val_res_free$categorical_level$raw_estimates$iota) - metric_values["avg_iota2"]=mean(val_res_free$categorical_level$raw_estimates$iota) - metric_values["max_iota2"]=max(val_res_free$categorical_level$raw_estimates$iota) - - metric_values["min_alpha"]=min(val_res_free$categorical_level$raw_estimates$alpha_reliability) - metric_values["avg_alpha"]=mean(val_res_free$categorical_level$raw_estimates$alpha_reliability) - metric_values["max_alpha"]=max(val_res_free$categorical_level$raw_estimates$alpha_reliability) - - metric_values["static_iota_index"]=val_res$scale_level$iota_index_d4 - metric_values["dynamic_iota_index"]=val_res$scale_level$iota_index_dyn2 - - metric_values["kalpha_nominal"]=irr::kripp.alpha(x=rbind(true_values,predicted_values), - method = "nominal")$value - metric_values["kalpha_ordinal"]=irr::kripp.alpha(x=rbind(true_values,predicted_values), - method = "ordinal")$value - - metric_values["kendall"]=irr::kendall(ratings=cbind(true_values,predicted_values), - correct=TRUE)$value - - metric_values["kappa2_unweighted"]=irr::kappa2(ratings=cbind(true_values,predicted_values), - weight = "unweighted", - sort.levels = FALSE)$value - metric_values["kappa2_equal_weighted"]=irr::kappa2(ratings=cbind(true_values,predicted_values), - weight = "equal", - sort.levels = FALSE)$value - metric_values["kappa2_squared_weighted"]=irr::kappa2(ratings=cbind(true_values,predicted_values), - weight = "squared", - sort.levels = FALSE)$value - - metric_values["kappa_fleiss"]=irr::kappam.fleiss(ratings=cbind(true_values,predicted_values), - exact = FALSE, - detail = FALSE)$value - - metric_values["percentage_agreement"]=irr::agree(ratings=cbind(true_values,predicted_values), - tolerance = 0)$value/100 - - metric_values["gwet_ac"]=irrCAC::gwet.ac1.raw(ratings=cbind(true_values,predicted_values))$est$coeff.val - - return(metric_values) + if(return_names_only==TRUE){ + return(metric_names) + } else { + val_res=iotarelr::check_new_rater(true_values = true_values, + assigned_values = predicted_values, + free_aem = TRUE) + + val_res=iotarelr::check_new_rater(true_values = true_values, + assigned_values = predicted_values, + free_aem = FALSE) + val_res_free=iotarelr::check_new_rater(true_values = true_values, + assigned_values = predicted_values, + free_aem = TRUE) + + metric_values["iota_index"]=val_res$scale_level$iota_index + + metric_values["min_iota2"]=min(val_res_free$categorical_level$raw_estimates$iota) + metric_values["avg_iota2"]=mean(val_res_free$categorical_level$raw_estimates$iota) + metric_values["max_iota2"]=max(val_res_free$categorical_level$raw_estimates$iota) + + metric_values["min_alpha"]=min(val_res_free$categorical_level$raw_estimates$alpha_reliability) + metric_values["avg_alpha"]=mean(val_res_free$categorical_level$raw_estimates$alpha_reliability) + metric_values["max_alpha"]=max(val_res_free$categorical_level$raw_estimates$alpha_reliability) + + metric_values["static_iota_index"]=val_res$scale_level$iota_index_d4 + metric_values["dynamic_iota_index"]=val_res$scale_level$iota_index_dyn2 + + metric_values["kalpha_nominal"]=irr::kripp.alpha(x=rbind(true_values,predicted_values), + method = "nominal")$value + metric_values["kalpha_ordinal"]=irr::kripp.alpha(x=rbind(true_values,predicted_values), + method = "ordinal")$value + + metric_values["kendall"]=irr::kendall(ratings=cbind(true_values,predicted_values), + correct=TRUE)$value + + metric_values["kappa2_unweighted"]=irr::kappa2(ratings=cbind(true_values,predicted_values), + weight = "unweighted", + sort.levels = FALSE)$value + metric_values["kappa2_equal_weighted"]=irr::kappa2(ratings=cbind(true_values,predicted_values), + weight = "equal", + sort.levels = FALSE)$value + metric_values["kappa2_squared_weighted"]=irr::kappa2(ratings=cbind(true_values,predicted_values), + weight = "squared", + sort.levels = FALSE)$value + + metric_values["kappa_fleiss"]=irr::kappam.fleiss(ratings=cbind(true_values,predicted_values), + exact = FALSE, + detail = FALSE)$value + + metric_values["percentage_agreement"]=irr::agree(ratings=cbind(true_values,predicted_values), + tolerance = 0)$value/100 + + metric_values["gwet_ac"]=irrCAC::gwet.ac1.raw(ratings=cbind(true_values,predicted_values))$est$coeff.val + + return(metric_values) + } } #------------------------------------------------------------------------------ diff --git a/R/install_and_config.R b/R/install_and_config.R index f6b0a12..ee24aaf 100644 --- a/R/install_and_config.R +++ b/R/install_and_config.R @@ -184,7 +184,7 @@ set_config_tf_logger<-function(level="ERROR"){ #'This function changes the level for logging information with 'tensorflow' via #'the os environment. This function must be called before importing 'tensorflow'. #' -#'@param level \code{string} Minimal level that should be printed to console. Five +#'@param level \code{string} Minimal level that should be printed to console. Four #'levels are available: INFO, WARNING, ERROR and NONE. #'@return This function does not return anything. It is used for its #'side effects. @@ -204,6 +204,30 @@ set_config_os_environ_logger<-function(level="ERROR"){ os$environ$setdefault("TF_CPP_MIN_LOG_LEVEL","2") } +#'Sets the level for logging information of the 'transformers' library. +#' +#'This function changes the level for logging information of the 'transformers' library. +#'It influences the output printed to console for creating and training transformer models as well as +#'\link{TextEmbeddingModel}s. +#' +#'@param level \code{string} Minimal level that should be printed to console. Four +#'levels are available: INFO, WARNING, ERROR and DEBUG +#'@return This function does not return anything. It is used for its +#'side effects. +#'@family Installation and Configuration +#'@export +set_transformers_logger<-function(level="ERROR"){ + if(level=="ERROR"){ + transformers$utils$logging$set_verbosity_error() + } else if (level=="WARNING"){ + transformers$utils$logging$set_verbosity_warning() + } else if (level=="INFO"){ + transformers$utils$logging$set_verbosity_info() + } else if(level=="DEBUG"){ + transformers$utils$logging$set_verbosity_debug() + } +} + #'R6 class for settting the global machine learning framework. #' #'R6 class for setting the global machine learning framework to 'PyTorch' or diff --git a/R/te_classifier_neuralnet_model.R b/R/te_classifier_neuralnet_model.R index 23350e9..5ca9977 100644 --- a/R/te_classifier_neuralnet_model.R +++ b/R/te_classifier_neuralnet_model.R @@ -789,32 +789,20 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class( data_bsc_test=data_bsc_train #Initializing Objects for Saving Performance + metric_names=get_coder_metrics( + true_values=NULL, + predicted_values=NULL, + return_names_only=TRUE) + test_metric=array(dim=c(folds$n_folds, 4, - 18), + length(metric_names)), dimnames = list(iterations=NULL, steps=c("Baseline", "BSC", "BPL", "Final"), - metrics=c("iota_index", - "min_iota2", - "avg_iota2", - "max_iota2", - "min_alpha", - "avg_alpha", - "max_alpha", - "static_iota_index", - "dynamic_iota_index", - "kalpha_nominal", - "kalpha_ordinal", - "kendall", - "kappa2_unweighted", - "kappa2_equal_weighted", - "kappa2_squared_weighted", - "kappa_fleiss", - "percentage_agreement", - "gwet_ac"))) + metrics=metric_names)) iota_objects_start=NULL iota_objects_end=NULL iota_objects_start_free=NULL diff --git a/R/text_embedding_model.R b/R/text_embedding_model.R index e9fe88a..f323093 100644 --- a/R/text_embedding_model.R +++ b/R/text_embedding_model.R @@ -274,38 +274,38 @@ TextEmbeddingModel<-R6::R6Class( if(private$basic_components$method=="bert"){ private$transformer_components$tokenizer<-transformers$BertTokenizerFast$from_pretrained(model_dir) if(ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFBertForMaskedLM$from_pretrained(model_dir,from_pt=from_pt) + private$transformer_components$model<-transformers$TFBertModel$from_pretrained(model_dir,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$BertForMaskedLM$from_pretrained(model_dir,from_tf=from_tf) + private$transformer_components$model<-transformers$BertModel$from_pretrained(model_dir,from_tf=from_tf) } } else if(private$basic_components$method=="roberta"){ private$transformer_components$tokenizer<-transformers$RobertaTokenizerFast$from_pretrained(model_dir) if(ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFRobertaForMaskedLM$from_pretrained(model_dir,from_pt=from_pt) + private$transformer_components$model<-transformers$TFRobertaModel$from_pretrained(model_dir,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$RobertaForMaskedLM$from_pretrained(model_dir,from_tf=from_tf) + private$transformer_components$model<-transformers$RobertaModel$from_pretrained(model_dir,from_tf=from_tf) } } else if(private$basic_components$method=="longformer"){ private$transformer_components$tokenizer<-transformers$LongformerTokenizerFast$from_pretrained(model_dir) if(ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFLongformerForMaskedLM$from_pretrained(model_dir,from_pt=from_pt) + private$transformer_components$model<-transformers$TFLongformerModel$from_pretrained(model_dir,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$LongformerForMaskedLM$from_pretrained(model_dir,from_tf=from_tf) + private$transformer_components$model<-transformers$LongformerModel$from_pretrained(model_dir,from_tf=from_tf) } } else if(private$basic_components$method=="funnel"){ private$transformer_components$tokenizer<-transformers$AutoTokenizer$from_pretrained(model_dir) if(ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFFunnelForMaskedLM$from_pretrained(model_dir,from_pt=from_pt) + private$transformer_components$model<-transformers$TFFunnelBaseModel$from_pretrained(model_dir,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$FunnelForMaskedLM$from_pretrained(model_dir,from_tf=from_tf) + private$transformer_components$model<-transformers$FunnelBaseModel$from_pretrained(model_dir,from_tf=from_tf) } } else if(private$basic_components$method=="deberta_v2"){ private$transformer_components$tokenizer<-transformers$AutoTokenizer$from_pretrained(model_dir) if(ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFDebertaV2ForMaskedLM$from_pretrained(model_dir,from_pt=from_pt) + private$transformer_components$model<-transformers$TFDebertaV2Model$from_pretrained(model_dir,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$DebertaV2ForMaskedLM$from_pretrained(model_dir,from_tf=from_tf) + private$transformer_components$model<-transformers$DebertaV2Model$from_pretrained(model_dir,from_tf=from_tf) } } else if(private$basic_components$method=="rwkv"){ private$transformer_components$tokenizer<-transformers$AutoTokenizer$from_pretrained(model_dir) @@ -559,37 +559,37 @@ TextEmbeddingModel<-R6::R6Class( if(private$basic_components$method=="bert"){ private$transformer_components$tokenizer<-transformers$BertTokenizerFast$from_pretrained(model_dir_main) if(private$transformer_components$ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFBertForMaskedLM$from_pretrained(model_dir_main,from_pt=from_pt) + private$transformer_components$model<-transformers$TFBertModel$from_pretrained(model_dir_main,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$BertForMaskedLM$from_pretrained(model_dir_main,from_tf=from_tf) + private$transformer_components$model<-transformers$BertModel$from_pretrained(model_dir_main,from_tf=from_tf) } } else if(private$basic_components$method=="roberta"){ private$transformer_components$tokenizer<-transformers$RobertaTokenizerFast$from_pretrained(model_dir_main) if(private$transformer_components$ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFRobertaForMaskedLM$from_pretrained(model_dir_main,from_pt=from_pt) + private$transformer_components$model<-transformers$TFRobertaModel$from_pretrained(model_dir_main,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$RobertaForMaskedLM$from_pretrained(model_dir_main,from_tf=from_tf) + private$transformer_components$model<-transformers$RobertaModel$from_pretrained(model_dir_main,from_tf=from_tf) } } else if(private$basic_components$method=="longformer"){ private$transformer_components$tokenizer<-transformers$LongformerTokenizerFast$from_pretrained(model_dir_main) if(private$transformer_components$ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFLongformerForMaskedLM$from_pretrained(model_dir_main,from_pt=from_pt) + private$transformer_components$model<-transformers$TFLongformerModel$from_pretrained(model_dir_main,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$LongformerForMaskedLM$from_pretrained(model_dir_main,from_tf=from_tf) + private$transformer_components$model<-transformers$LongformerModel$from_pretrained(model_dir_main,from_tf=from_tf) } } else if(private$basic_components$method=="funnel"){ private$transformer_components$tokenizer<-transformers$AutoTokenizer$from_pretrained(model_dir_main) if(private$transformer_components$ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFFunnelForMaskedLM$from_pretrained(model_dir_main,from_pt=from_pt) + private$transformer_components$model<-transformers$TFFunnelBaseModel$from_pretrained(model_dir_main,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$FunnelForMaskedLM$from_pretrained(model_dir_main,from_tf=from_tf) + private$transformer_components$model<-transformers$FunnelBaseModel$from_pretrained(model_dir_main,from_tf=from_tf) } } else if(private$basic_components$method=="deberta_v2"){ private$transformer_components$tokenizer<-transformers$AutoTokenizer$from_pretrained(model_dir_main) if(private$transformer_components$ml_framework=="tensorflow"){ - private$transformer_components$model<-transformers$TFDebertaV2ForMaskedLM$from_pretrained(model_dir_main,from_pt=from_pt) + private$transformer_components$model<-transformers$TFDebertaV2Model$from_pretrained(model_dir_main,from_pt=from_pt) } else { - private$transformer_components$model<-transformers$DebertaV2ForMaskedLM$from_pretrained(model_dir_main,from_tf=from_tf) + private$transformer_components$model<-transformers$DebertaV2Model$from_pretrained(model_dir_main,from_tf=from_tf) } } else if(private$basic_components$method=="rwkv"){ private$transformer_components$tokenizer<-transformers$AutoTokenizer$from_pretrained(model_dir_main) diff --git a/R/transformer_bert.R b/R/transformer_bert.R index 685ca45..c3c7d3c 100644 --- a/R/transformer_bert.R +++ b/R/transformer_bert.R @@ -172,7 +172,7 @@ create_bert_model<-function( } configuration=transformers$BertConfig( - vocab_size=as.integer(vocab_size), + vocab_size=as.integer(length(tokenizer$get_vocab())), max_position_embeddings=as.integer(max_position_embeddings), hidden_size=as.integer(hidden_size), num_hidden_layer=as.integer(num_hidden_layer), diff --git a/R/transformer_deberta_v2.R b/R/transformer_deberta_v2.R index e1c438d..992c4c5 100644 --- a/R/transformer_deberta_v2.R +++ b/R/transformer_deberta_v2.R @@ -186,7 +186,7 @@ create_deberta_v2_model<-function( } configuration=transformers$DebertaV2Config( - vocab_size=as.integer(vocab_size), + vocab_size=as.integer(length(tokenizer$get_vocab())), max_position_embeddings=as.integer(max_position_embeddings), hidden_size=as.integer(hidden_size), num_hidden_layer=as.integer(num_hidden_layer), diff --git a/R/transformer_funnel.R b/R/transformer_funnel.R index 3d496fb..c8cc0b9 100644 --- a/R/transformer_funnel.R +++ b/R/transformer_funnel.R @@ -204,7 +204,7 @@ create_funnel_model<-function( } configuration=transformers$FunnelConfig( - vocab_size=as.integer(vocab_size), + vocab_size=as.integer(length(tokenizer$get_vocab())), block_sizes =as.integer(block_sizes), block_repeats=NULL, num_decoder_layers=as.integer(num_decoder_layers), diff --git a/R/transformer_longformer.R b/R/transformer_longformer.R index 616b096..45f7b17 100644 --- a/R/transformer_longformer.R +++ b/R/transformer_longformer.R @@ -166,7 +166,7 @@ create_longformer_model<-function( } configuration=transformers$LongformerConfig( - vocab_size=as.integer(vocab_size), + vocab_size=as.integer(length(tokenizer$get_vocab())), max_position_embeddings=as.integer(max_position_embeddings), hidden_size=as.integer(hidden_size), num_hidden_layer=as.integer(num_hidden_layer), diff --git a/R/transformer_roberta.R b/R/transformer_roberta.R index d315d87..5c2b02d 100644 --- a/R/transformer_roberta.R +++ b/R/transformer_roberta.R @@ -174,7 +174,7 @@ create_roberta_model<-function( } configuration=transformers$RobertaConfig( - vocab_size=as.integer(vocab_size), + vocab_size=as.integer(length(tokenizer$get_vocab())), max_position_embeddings=as.integer(max_position_embeddings), hidden_size=as.integer(hidden_size), num_hidden_layer=as.integer(num_hidden_layer), diff --git a/man/AifeducationConfiguration.Rd b/man/AifeducationConfiguration.Rd index a30af5f..37bb860 100644 --- a/man/AifeducationConfiguration.Rd +++ b/man/AifeducationConfiguration.Rd @@ -29,7 +29,8 @@ Other Installation and Configuration: \code{\link{set_config_cpu_only}()}, \code{\link{set_config_gpu_low_memory}()}, \code{\link{set_config_os_environ_logger}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} \section{Methods}{ diff --git a/man/aifeducation_config.Rd b/man/aifeducation_config.Rd index c8c87ed..97ccefc 100644 --- a/man/aifeducation_config.Rd +++ b/man/aifeducation_config.Rd @@ -21,7 +21,8 @@ Other Installation and Configuration: \code{\link{set_config_cpu_only}()}, \code{\link{set_config_gpu_low_memory}()}, \code{\link{set_config_os_environ_logger}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} \keyword{datasets} diff --git a/man/check_aif_py_modules.Rd b/man/check_aif_py_modules.Rd index 7db7bc1..655578f 100644 --- a/man/check_aif_py_modules.Rd +++ b/man/check_aif_py_modules.Rd @@ -29,6 +29,7 @@ Other Installation and Configuration: \code{\link{set_config_cpu_only}()}, \code{\link{set_config_gpu_low_memory}()}, \code{\link{set_config_os_environ_logger}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} diff --git a/man/get_coder_metrics.Rd b/man/get_coder_metrics.Rd index b06112e..c87310b 100644 --- a/man/get_coder_metrics.Rd +++ b/man/get_coder_metrics.Rd @@ -4,15 +4,22 @@ \alias{get_coder_metrics} \title{Calculate reliability measures based on content analysis} \usage{ -get_coder_metrics(true_values, predicted_values) +get_coder_metrics( + true_values = NULL, + predicted_values = NULL, + return_names_only = FALSE +) } \arguments{ \item{true_values}{\code{factor} containing the true labels/categories.} \item{predicted_values}{\code{factor} containing the predicted labels/categories.} + +\item{return_names_only}{\code{bool} If \code{TRUE} returns only the names +of the resulting vector. Use {FALSE} to request computation of the values.} } \value{ -Returns a \code{vector} with the following reliability measures: +If \code{return_names_only=FALSE} returns a \code{vector} with the following reliability measures: #'\itemize{ \item{\strong{iota_index: }}{Iota Index from the Iota Reliability Concept Version 2.} \item{\strong{min_iota2: }}{Minimal Iota from Iota Reliability Concept Version 2.} @@ -33,6 +40,8 @@ Returns a \code{vector} with the following reliability measures: \item{\strong{percentage_agreement: }}{Percentage Agreement.} \item{\strong{gwet_ac: }}{Gwet's AC1/AC2 agreement coefficient.} } + +If \code{return_names_only=TRUE} returns only the names of the vector elements. } \description{ This function calculates different reliability measures which are based on the diff --git a/man/install_py_modules.Rd b/man/install_py_modules.Rd index 516b87e..96ab604 100644 --- a/man/install_py_modules.Rd +++ b/man/install_py_modules.Rd @@ -43,6 +43,7 @@ Other Installation and Configuration: \code{\link{set_config_cpu_only}()}, \code{\link{set_config_gpu_low_memory}()}, \code{\link{set_config_os_environ_logger}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} diff --git a/man/set_config_cpu_only.Rd b/man/set_config_cpu_only.Rd index 72b5683..77e4e30 100644 --- a/man/set_config_cpu_only.Rd +++ b/man/set_config_cpu_only.Rd @@ -24,6 +24,7 @@ Other Installation and Configuration: \code{\link{install_py_modules}()}, \code{\link{set_config_gpu_low_memory}()}, \code{\link{set_config_os_environ_logger}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} diff --git a/man/set_config_gpu_low_memory.Rd b/man/set_config_gpu_low_memory.Rd index 0f9e4d1..89aca0d 100644 --- a/man/set_config_gpu_low_memory.Rd +++ b/man/set_config_gpu_low_memory.Rd @@ -27,6 +27,7 @@ Other Installation and Configuration: \code{\link{install_py_modules}()}, \code{\link{set_config_cpu_only}()}, \code{\link{set_config_os_environ_logger}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} diff --git a/man/set_config_os_environ_logger.Rd b/man/set_config_os_environ_logger.Rd index a51b44f..b58d5d6 100644 --- a/man/set_config_os_environ_logger.Rd +++ b/man/set_config_os_environ_logger.Rd @@ -7,7 +7,7 @@ set_config_os_environ_logger(level = "ERROR") } \arguments{ -\item{level}{\code{string} Minimal level that should be printed to console. Five +\item{level}{\code{string} Minimal level that should be printed to console. Four levels are available: INFO, WARNING, ERROR and NONE.} } \value{ @@ -26,6 +26,7 @@ Other Installation and Configuration: \code{\link{install_py_modules}()}, \code{\link{set_config_cpu_only}()}, \code{\link{set_config_gpu_low_memory}()}, -\code{\link{set_config_tf_logger}()} +\code{\link{set_config_tf_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} diff --git a/man/set_config_tf_logger.Rd b/man/set_config_tf_logger.Rd index f414ccf..4a078eb 100644 --- a/man/set_config_tf_logger.Rd +++ b/man/set_config_tf_logger.Rd @@ -25,6 +25,7 @@ Other Installation and Configuration: \code{\link{install_py_modules}()}, \code{\link{set_config_cpu_only}()}, \code{\link{set_config_gpu_low_memory}()}, -\code{\link{set_config_os_environ_logger}()} +\code{\link{set_config_os_environ_logger}()}, +\code{\link{set_transformers_logger}()} } \concept{Installation and Configuration} diff --git a/man/set_transformers_logger.Rd b/man/set_transformers_logger.Rd new file mode 100644 index 0000000..7088005 --- /dev/null +++ b/man/set_transformers_logger.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/install_and_config.R +\name{set_transformers_logger} +\alias{set_transformers_logger} +\title{Sets the level for logging information of the 'transformers' library.} +\usage{ +set_transformers_logger(level = "ERROR") +} +\arguments{ +\item{level}{\code{string} Minimal level that should be printed to console. Four +levels are available: INFO, WARNING, ERROR and DEBUG} +} +\value{ +This function does not return anything. It is used for its +side effects. +} +\description{ +This function changes the level for logging information of the 'transformers' library. +It influences the output printed to console for creating and training transformer models as well as +\link{TextEmbeddingModel}s. +} +\seealso{ +Other Installation and Configuration: +\code{\link{AifeducationConfiguration}}, +\code{\link{aifeducation_config}}, +\code{\link{check_aif_py_modules}()}, +\code{\link{install_py_modules}()}, +\code{\link{set_config_cpu_only}()}, +\code{\link{set_config_gpu_low_memory}()}, +\code{\link{set_config_os_environ_logger}()}, +\code{\link{set_config_tf_logger}()} +} +\concept{Installation and Configuration} diff --git a/vignettes/aifeducation.Rmd b/vignettes/aifeducation.Rmd index 2c3cd71..beb172f 100644 --- a/vignettes/aifeducation.Rmd +++ b/vignettes/aifeducation.Rmd @@ -210,17 +210,15 @@ Now everything is ready to use the package. `reticulate::use_condaenv(condaenv = "aifeducation")` **before** loading the library to make the python modules available for work. -# 2) Configuration +# 2) Configuration of Tensorflow In general, educators and educational researchers neither have access to high performance computing nor do they own computers with a performing graphic device for their work. Thus, some additional configuration can be done to get computations working on your machine. -## 2.1) Tensorflow - -If you do use a computer that does not own a graphic device, you can -disable the graphic device support of tensorflow with the function +If you do use a computer that does own a graphic device, but you would like to use cpu only +you can disable the graphic device support of tensorflow with the function `set_config_cpu_only()`. ```{r, include = TRUE, eval=FALSE} @@ -253,7 +251,49 @@ You can choose between five levels "FATAL", "ERROR", "WARN", "INFO", and "DEBUG", setting the minimal level for logging. -# 3) Tutorials and Guides +# 3 Starting a New Session +Before you can work with *aifeducation* you must set up a new *R* +session. First, it is necessary that you load the library. Second, you +must set up python via reticulate. In case you installed python as +suggested in this vignette you may start a new session like this: + +```{r, include = TRUE, eval=FALSE} +reticulate::use_condaenv(condaenv = "aifeducation") +library(aifeducation) +set_transformers_logger("ERROR") +``` + +Next you have to choose the machine learning framework you would like to use. +You can set the framework for the complete session with + +```{r, include = TRUE, eval=FALSE} +#For tensorflow +aifeducation_config$set_global_ml_backend("tensorflow") + +#For PyTorch +aifeducation_config$set_global_ml_backend("pytorch") +``` + +In the case that you would like to use tensorflow now is a good time to +configure that backend, since some configurations +can only be done **before** tensorflow is used the first time. + +```{r, include = TRUE, eval=FALSE} +#if you would like to use only cpus +set_config_cpu_only() + +#if you have a graphic device with low memory +set_config_gpu_low_memory() + +#if you would like to reduce the tensorflow output to errors +set_config_os_environ_logger(level = "ERROR") +``` + +> **Note:** Please remember: Every time you start a new session in *R* you have to +to set the correct conda environment, to load the library *aifeducation*, and +to chose your machine learning framework. + +# 4) Tutorials and Guides A short introduction into the package with examples for classification tasks can be found in vignette diff --git a/vignettes/classification_tasks.Rmd b/vignettes/classification_tasks.Rmd index c22f879..1e7c9b4 100644 --- a/vignettes/classification_tasks.Rmd +++ b/vignettes/classification_tasks.Rmd @@ -224,19 +224,21 @@ suggested in vignette [01 Get started](articles/aifeducation.html) you may start a new session like this: ```{r, include = TRUE, eval=FALSE} -library(aifeducation) reticulate::use_condaenv(condaenv = "aifeducation") +library(aifeducation) ``` Next you have to choose the machine learning framework you would like to use. -You can set the framework for complete the session with +You can set the framework for the complete session with ```{r, include = TRUE, eval=FALSE} #For tensorflow aifeducation_config$set_global_ml_backend("tensorflow") +set_transformers_logger("ERROR") #For PyTorch aifeducation_config$set_global_ml_backend("pytorch") +set_transformers_logger("ERROR") ``` In the case that you would like to use tensorflow now is a good time to @@ -255,7 +257,7 @@ set_config_os_environ_logger(level = "ERROR") ``` > **Note:** Please remember: Every time you start a new session in *R* you have to -load the library, to set the correct conda environment after loading *aifeducation*, and +to set the correct conda environment, to load the library *aifeducation*, and to chose your machine learning framework. # 2.2 Reading Texts into *R*