Skip to content

Commit

Permalink
Fix Transformer Trace
Browse files Browse the repository at this point in the history
Fix Classifier Checking
  • Loading branch information
FBerding committed Apr 9, 2024
1 parent 7be7686 commit f1e5883
Show file tree
Hide file tree
Showing 24 changed files with 631 additions and 543 deletions.
1,016 changes: 508 additions & 508 deletions .Rhistory

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master, version_0_3_2_dev]
branches: [main, master, version_0_3_3_dev]
pull_request:
branches: [main, master, version_0_3_2_dev]
branches: [main, master, version_0_3_3_dev]

name: R-CMD-check

Expand Down
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: aifeducation
Title: Artificial Intelligence for Education
Version: 0.3.2
Version: 0.3.3
Authors@R: c(
person("Berding", "Florian", , "florian.berding@uni-hamburg.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-3593-1695")),
Expand Down Expand Up @@ -61,7 +61,6 @@ Suggests:
topicmodels,
udpipe,
quanteda,
quanteda.textmodels,
knitr,
rmarkdown,
testthat (>= 3.0.0),
Expand Down
11 changes: 11 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@ editor_options:
markdown:
wrap: 72
---
# aifeducation 0.3.3

**Graphical User Interface Aifeducation Studio**

- Fixed a bug concerning the ids of .pdf and .csv files. Now the ids are correctly
saved within a text collection file.

**Further Changes**

- Removed quanteda.textmodels as necessary library for testing the package.
- Added a dataset for testing the package based on Maas et al. (2011).

# aifeducation 0.3.2

Expand Down
5 changes: 3 additions & 2 deletions R/aif_gui.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ start_aifeducation_studio<-function(){
set_transformers_logger(level="ERROR")
#Disable tqdm progressbar
transformers$logging$disable_progress_bar()
datasets$disable_progress_bars()

#Start GUI--------------------------------------------------------------------
options(shiny.reactlog=TRUE)
Expand Down Expand Up @@ -1091,7 +1092,7 @@ start_aifeducation_studio<-function(){
}
if(input$dp_include_csv==FALSE &
input$dp_include_pdf==FALSE &
input$dp_include_csv==FALSE){
input$dp_include_xlsx==FALSE){
error_list[length(error_list)+1]="No file types selected. Please select
at least one file type."
}
Expand Down Expand Up @@ -1191,7 +1192,7 @@ start_aifeducation_studio<-function(){
#File name without extension
#text_corpus[counter,"id"]=stringi::stri_split_fixed(tmp_document$doc_id,pattern=".")[[1]][1]
tmp_string=stringr::str_split_fixed(tmp_document$doc_id,pattern="\\.",n=Inf)
text_corpus[counter,"id"]=tmp_string[1,ncol(tmp_string)]
text_corpus[counter,"id"]=paste0(tmp_string[1,1:(ncol(tmp_string)-1)],collapse = ".")
text_corpus[counter,"text"]=tmp_document$text
counter=counter+1

Expand Down
22 changes: 22 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,25 @@
#' @format list
#' @keywords internal
"test_classifier_sustainability"

#' Standford Movie Review Dataset
#'
#' A \code{data.frame} consisting of a subset of 100 negative and 200 positive
#' movie reviews from the dataset provided by Maas et al. (2011).
#' The \code{data.frame} consists of three columns. The first column 'text' stores
#' the movie review. The second stores the labels (0 = negative, 1 = positive). The
#' last column stores the id.
#' The purpose of the data is for illustration in vignettes.
#'
#' @docType data
#' @format data.frame
#' @keywords internal
#' @references Maas, A. L., Daly, R. E., Pham, P. T., Huang, D.,
#' Ng, A. Y., & Potts, C. (2011). Learning Word Vectors for Sentiment
#' Analysis. In D. Lin, Y. Matsumoto, & R. Mihalcea (Eds.),
#' Proceedings of the 49th Annual Meeting of the Association for
#' Computational Linguistics: Human Language Technologies (pp. 142–150).
#' Association for Computational Linguistics.
#' https://aclanthology.org/P11-1015
#'
"imdb_movie_reviews"
11 changes: 10 additions & 1 deletion R/te_classifier_neuralnet_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -1924,7 +1924,16 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class(

embedding_model_config<-text_embeddings$get_model_info()
for(check in names(embedding_model_config)){
if(embedding_model_config[[check]]!=private$text_embedding_model$model[[check]]){
if(!is.null_or_na(embedding_model_config[[check]]) &
!is.null_or_na(private$text_embedding_model$model[[check]])){
if(embedding_model_config[[check]]!=private$text_embedding_model$model[[check]]){
return(FALSE)
}
} else if (!is.null_or_na(embedding_model_config[[check]]) &
is.null_or_na(private$text_embedding_model$model[[check]])){
return(FALSE)
} else if (is.null_or_na(embedding_model_config[[check]]) &
!is.null_or_na(private$text_embedding_model$model[[check]])){
return(FALSE)
}
}
Expand Down
4 changes: 4 additions & 0 deletions R/transformer_bert.R
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,10 @@ train_tune_bert_model=function(ml_framework=aifeducation_config$get_framework(),
data_collator = data_collator,
tokenizer = tokenizer)
trainer$remove_callback(transformers$integrations$CodeCarbonCallback)
if(as.logical(pytorch_trace)==FALSE){
trainer$remove_callback(transformers$PrinterCallback)
trainer$remove_callback(transformers$ProgressCallback)
}

#Load Custom Callbacks

Expand Down
4 changes: 4 additions & 0 deletions R/transformer_deberta_v2.R
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,10 @@ train_tune_deberta_v2_model=function(ml_framework=aifeducation_config$get_framew
tokenizer = tokenizer
)
trainer$remove_callback(transformers$integrations$CodeCarbonCallback)
if(as.logical(pytorch_trace)==FALSE){
trainer$remove_callback(transformers$PrinterCallback)
trainer$remove_callback(transformers$ProgressCallback)
}

#Add Callback if Shiny App is running
if(requireNamespace("shiny") & requireNamespace("shinyWidgets")){
Expand Down
4 changes: 4 additions & 0 deletions R/transformer_funnel.R
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,10 @@ train_tune_funnel_model=function(ml_framework=aifeducation_config$get_framework(
tokenizer = tokenizer
)
trainer$remove_callback(transformers$integrations$CodeCarbonCallback)
if(as.logical(pytorch_trace)==FALSE){
trainer$remove_callback(transformers$PrinterCallback)
trainer$remove_callback(transformers$ProgressCallback)
}

#Add Callback if Shiny App is running
if(requireNamespace("shiny") & requireNamespace("shinyWidgets")){
Expand Down
4 changes: 4 additions & 0 deletions R/transformer_longformer.R
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,10 @@ train_tune_longformer_model=function(ml_framework=aifeducation_config$get_framew
tokenizer = tokenizer
)
trainer$remove_callback(transformers$integrations$CodeCarbonCallback)
if(as.logical(pytorch_trace)==FALSE){
trainer$remove_callback(transformers$PrinterCallback)
trainer$remove_callback(transformers$ProgressCallback)
}

#Add Callback if Shiny App is running
if(requireNamespace("shiny") & requireNamespace("shinyWidgets")){
Expand Down
4 changes: 4 additions & 0 deletions R/transformer_roberta.R
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,10 @@ train_tune_roberta_model=function(ml_framework=aifeducation_config$get_framework
tokenizer = tokenizer
)
trainer$remove_callback(transformers$integrations$CodeCarbonCallback)
if(as.logical(pytorch_trace)==FALSE){
trainer$remove_callback(transformers$PrinterCallback)
trainer$remove_callback(transformers$ProgressCallback)
}

#Add Callback if Shiny App is running
if(requireNamespace("shiny") & requireNamespace("shinyWidgets")){
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ knitr::opts_chunk$set(
<!-- badges: start -->

**GitHub**
[![](https://img.shields.io/badge/devel%20version-0.3.1-green.svg)](https://github.com/fberding/iotarelr)
[![](https://img.shields.io/badge/devel%20version-0.3.3-green.svg)](https://github.com/fberding/iotarelr)
[![R-CMD-check](https://github.com/FBerding/aifeducation/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/FBerding/aifeducation/actions/workflows/R-CMD-check.yaml)
**CRAN** [![CRAN
status](https://www.r-pkg.org/badges/version/aifeducation)](https://CRAN.R-project.org/package=aifeducation)
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<!-- badges: start -->

**GitHub**
[![](https://img.shields.io/badge/devel%20version-0.3.1-green.svg)](https://github.com/fberding/iotarelr)
[![](https://img.shields.io/badge/devel%20version-0.3.3-green.svg)](https://github.com/fberding/iotarelr)
[![R-CMD-check](https://github.com/FBerding/aifeducation/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/FBerding/aifeducation/actions/workflows/R-CMD-check.yaml)
**CRAN** [![CRAN
status](https://www.r-pkg.org/badges/version/aifeducation)](https://CRAN.R-project.org/package=aifeducation)
Expand Down Expand Up @@ -142,6 +142,8 @@ following table provides more details:
| Longformer | Yes | Yes | Yes |
| Text Embedding Classifier | Yes | Yes | No |

Please not that tensorflow is currently supported up to version 2.15.

## Classification Tasks

### Transforming Texts into Numbers
Expand Down
Binary file added data/imdb_movie_reviews.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/get_n_chunks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_train_test_split.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/imdb_movie_reviews.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions tests/testthat/test-01_vocab_draft.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
testthat::skip_on_cran()
testthat::skip_if_not_installed(pkg="quanteda")
testthat::skip_if_not_installed(pkg="udpipe")

tmp_path="test_data/language_models/udpipe_models/english-ewt-ud-2.5-191206.udpipe"
tmp_condition=file.exists(testthat::test_path(tmp_path))
testthat::skip_if_not(condition=tmp_condition,
message = "udpipe language model not available")

test_that("bow_pp_create_vocab_draft", {
example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id1,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
example_data<-imdb_movie_reviews

res<-bow_pp_create_vocab_draft(
path_language_model=testthat::test_path(tmp_path),
Expand Down
9 changes: 5 additions & 4 deletions tests/testthat/test-02_basic_text_rep.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@

testthat::skip_on_os("windows")
testthat::skip_if_not_installed(pkg="quanteda")
testthat::skip_if_not_installed(pkg="topicmodels")
testthat::skip_if_not_installed(pkg="text2vec")
testthat::skip_if_not_installed(pkg="tidytext")

path="test_data/gvc_lda/vocab_draft_movie_review.rda"
testthat::skip_if_not(condition=file.exists(testthat::test_path(path)),
Expand All @@ -10,10 +14,7 @@ load(testthat::test_path(path))
#------------------------------------------------------------------------------
test_that("bow_pp_create_vocab_draft", {

example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id1,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
example_data<-imdb_movie_reviews

res<-bow_pp_create_basic_text_rep(
data=example_data$text[1:100],
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-03_text_embedding_model_gvc_lda.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
testthat::skip_on_os("windows")
path="test_data/gvc_lda/basic_text_rep_movie_reviews.rda"
testthat::skip_if_not(condition=file.exists(testthat::test_path(path)),
message = "Necessary dataset not available")
Expand All @@ -10,13 +11,12 @@ if(dir.exists(testthat::test_path("test_artefacts/tmp_full_models"))==FALSE){
dir.create(testthat::test_path("test_artefacts/tmp_full_models"))
}

datasets$disable_progress_bars()

#------------------------------------------------------------------------------
load(testthat::test_path(path))

example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id2,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
example_data<-imdb_movie_reviews
#------------------------------------------------------------------------------

global_vector_clusters_modeling<-TextEmbeddingModel$new(
Expand Down
5 changes: 1 addition & 4 deletions tests/testthat/test-04_transformer_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,7 @@ rows_susatainability["longformer"]=2
rows_susatainability["deberta_v2"]=3


example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id1,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
example_data<-imdb_movie_reviews

print(check_aif_py_modules())

Expand Down
9 changes: 3 additions & 6 deletions tests/testthat/test-05_classifier_neural_net.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,10 @@ current_embeddings<-bert_embeddings$clone(deep = TRUE)

for(framework in ml_frameworks){
for (n_classes in 2:3){
example_data<-data.frame(
id=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$id2,
label=quanteda::docvars(quanteda.textmodels::data_corpus_moviereviews)$sentiment)
example_data$text<-as.character(quanteda.textmodels::data_corpus_moviereviews)
example_data$label<-as.character(example_data$label)
example_data<-imdb_movie_reviews

rownames(example_data)<-example_data$id
rownames(example_data)<-rownames(current_embeddings$embeddings)
example_data$id<-rownames(current_embeddings$embeddings)
example_data<-example_data[intersect(
rownames(example_data),rownames(current_embeddings$embeddings)),]

Expand Down
4 changes: 2 additions & 2 deletions vignettes/classification_tasks.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ basic_text_rep<-bow_pp_create_basic_text_rep(
remove_separators = TRUE,
split_hyphens = FALSE,
split_tags = FALSE,
language_stopwords="eng",
language_stopwords="en",
use_lemmata = FALSE,
to_lower=FALSE,
min_termfreq = NULL,
Expand Down Expand Up @@ -585,7 +585,7 @@ directory. The next step is to train your model by calling
train_tune_bert_model(
ml_framework=aifeducation_config$get_framework(),
output_dir = "my_own_transformer_trained",
bert_model_dir_path = "my_own_transformer",
model_dir_path = "my_own_transformer",
raw_texts = example_data$text,
p_mask=0.15,
whole_word=TRUE,
Expand Down

0 comments on commit f1e5883

Please sign in to comment.