Skip to content

Commit

Permalink
Merge pull request #4 from FBerding/version_0_3_2_dev
Browse files Browse the repository at this point in the history
Version 0 3 2
  • Loading branch information
FBerding committed Mar 17, 2024
2 parents 60e55d3 + f81e8d3 commit c749c48
Show file tree
Hide file tree
Showing 129 changed files with 1,480 additions and 299 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ cran-comments.md
^doc$
^Meta$
gui_aife_studio.Rmd
classification_tasks.Rmd
intel_power_gadget_log.csv
reference
2 changes: 1 addition & 1 deletion .Rhistory
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#Callback-------------------------------------------------------------------
if use_callback==True:
if bacc_val>best_bacc:
if trace>=1:
Expand Down Expand Up @@ -510,3 +509,4 @@ install.packages(c("BH", "brew", "brio", "cli", "coda", "commonmark", "curl", "d
install.packages(c("BH", "brew", "brio", "cli", "coda", "commonmark", "curl", "data.table", "DBI", "desc", "digest", "DT", "e1071", "fansi", "float", "FNN", "glue", "htmlwidgets", "httpuv", "igraph", "ISOcodes", "later", "LiblineaR", "listenv", "lpSolve", "markdown", "Matrix", "MatrixExtra", "mgcv", "MplusAutomation", "pak", "parallelly", "pkgbuild", "pkgload", "processx", "progress", "ps", "psych", "ragg", "Rcpp", "RcppArmadillo", "RCurl", "readODS", "readr", "reticulate", "rJava", "roxygen2", "s2", "sass", "sf", "shinyWidgets", "stringi", "survival", "tensorflow", "tfruns", "tidyr", "topicmodels", "vroom", "withr", "xfun", "yaml", "zip"))
install.packages("brew")
install.packages(c("BH", "brio", "cli", "coda", "commonmark", "curl", "data.table", "DBI", "desc", "digest", "DT", "e1071", "fansi", "float", "FNN", "glue", "htmlwidgets", "httpuv", "igraph", "ISOcodes", "later", "LiblineaR", "listenv", "lpSolve", "markdown", "Matrix", "MatrixExtra", "mgcv", "MplusAutomation", "pak", "parallelly", "pkgbuild", "pkgload", "processx", "progress", "ps", "psych", "ragg", "Rcpp", "RcppArmadillo", "RCurl", "readODS", "readr", "reticulate", "rJava", "roxygen2", "s2", "sass", "sf", "shinyWidgets", "stringi", "survival", "tensorflow", "tfruns", "tidyr", "topicmodels", "vroom", "withr", "xfun", "yaml", "zip"))
devtools::test()
7 changes: 3 additions & 4 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master, version_0_3_1_dev]
branches: [main, master, version_0_3_2_dev]
pull_request:
branches: [main, master, version_0_3_1_dev]
branches: [main, master, version_0_3_2_dev]

name: R-CMD-check

Expand Down Expand Up @@ -66,11 +66,10 @@ jobs:
conda_install(
packages = c(
"tensorflow-cpu",
"tensorflow-cpu<=2.15",
"torch",
"torcheval",
"safetensors",
"keras",
"accelerate"),
envname = envname,
conda = "auto",
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: aifeducation
Title: Artificial Intelligence for Education
Version: 0.3.1
Version: 0.3.2
Authors@R: c(
person("Berding", "Florian", , "florian.berding@uni-hamburg.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-3593-1695")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(bow_pp_create_basic_text_rep)
export(bow_pp_create_vocab_draft)
export(calc_standard_classification_measures)
export(check_aif_py_modules)
export(clean_pytorch_log_transformers)
export(combine_embeddings)
export(create_bert_model)
export(create_deberta_v2_model)
Expand All @@ -20,6 +21,7 @@ export(get_coder_metrics)
export(get_n_chunks)
export(get_synthetic_cases)
export(install_py_modules)
export(is.null_or_na)
export(load_ai_model)
export(matrix_to_array_c)
export(save_ai_model)
Expand Down
42 changes: 41 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,46 @@ editor_options:
wrap: 72
---

# aifeducation 0.3.2

**TextEmbeddingClassifiers**

- Fixed a bug in GlobalAveragePooling1D_PT. Now the layer makes a correct pooling.
**This change has an effect on PyTorch models trained with version 0.3.1.**

**TextEmbeddingModel**

- Replaced the parameter 'aggregation' with three new parameters allowing to explicitly
choose the start and end layer to be included in the creation of embeddings. Furthermore,
two options for the pooling method within each layer is added ("cls" and "average").
- Added support for reporting the training and validation loss during training
the corresponding base model.

**Transformer Models**

- Fixed a bug in the creation of all transformer models except funnel. Now choosing the
number of layers is working.
- A file 'history.log' is now saved within the model's folder reporting the loss
and validation loss during training for each epoch.

**EmbeddedText**

- Changed the process for validating if EmbeddedTexts are compatible. Now only
the model's unique name is used for the validation.
- Added new fields and updated methods to account for the new options in creating embeddings (layer
selection and pooling type).

**Graphical User Interface Aifeducation Studio**

- Adapted the interface according to the changes made in this version.
- Improved the read of raw texts. Reading now reduces multiple spaces characters to
one single space character. Hyphenation is removed.

**Python Installation**

- Updated installation to account for the new version of keras.


# aifeducation 0.3.1

**Graphical User Interface Aifeducation Studio**
Expand Down Expand Up @@ -63,7 +103,7 @@ editor_options:
- Added an argument to 'install_py_modules',
allowing to choose which machine learning framework should be
installed.
- Updated 'check_aif_py_modules'.
- Updated 'check_aif_py_modules'.

**Further Changes**

Expand Down
159 changes: 138 additions & 21 deletions R/aif_gui.R
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,8 @@ start_aifeducation_studio<-function(){
total=n_files,
title = as.character(all_paths[i]))
tmp_document=readtext::readtext(file=all_paths[i])
tmp_document$text=stringr::str_replace_all(tmp_document$text,pattern = "[:space:]{1,}",replacement = " ")
tmp_document$text=stringr::str_replace_all(tmp_document$text,pattern = "-(?=[:space:])",replacement = "")
#File name without extension
#text_corpus[counter,"id"]=stringi::stri_split_fixed(tmp_document$doc_id,pattern=".")[[1]][1]
tmp_string=stringr::str_split_fixed(tmp_document$doc_id,pattern="\\.",n=Inf)
Expand Down Expand Up @@ -1751,12 +1753,16 @@ start_aifeducation_studio<-function(){
})

train_tune_model_architecture<-shiny::eventReactive(model_path_train_LM(),{
shinyWidgets::show_alert(title="Loading",
text = "Please wait",
type="info",
closeOnClickOutside = FALSE,
showCloseButton = FALSE)
model_path<-model_path_train_LM()
print(model_path)
if(!is.null(model_path) &
!identical(model_path,character(0))){
shinyWidgets::show_alert(title="Loading",
text = "Please wait",
type="info",
closeOnClickOutside = FALSE,
showCloseButton = FALSE)
}
if(!is.null(model_path)){
if(file.exists(paste0(model_path,
"/",
Expand Down Expand Up @@ -2257,23 +2263,39 @@ start_aifeducation_studio<-function(){
model<-transformers$TFAutoModel$from_pretrained(model_path)
model_architecture<-model$config$architectures
max_position_embeddings=model$config$max_position_embeddings
if(model_architecture=="FunnelForMaskedLM"){
max_layer=sum(model$config$block_repeats*model$config$block_sizes)
} else {
max_layer=model$config$num_hidden_layers
}
} else if(file.exists(paste0(model_path,
"/",
"pytorch_model.bin"))){
model<-transformers$AutoModel$from_pretrained(model_path)
model_architecture<-model$config$architectures
max_position_embeddings=model$config$max_position_embeddings
if(model_architecture=="FunnelForMaskedLM"){
max_layer=sum(model$config$block_repeats*model$config$block_sizes)
} else {
max_layer=model$config$num_hidden_layers
}
} else if(file.exists(paste0(model_path,
"/",
"model.safetensors"))){
model<-transformers$AutoModel$from_pretrained(model_path)
model_architecture<-model$config$architectures
max_position_embeddings=model$config$max_position_embeddings
if(model_architecture=="FunnelForMaskedLM"){
max_layer=sum(model$config$block_repeats*model$config$block_sizes)
} else {
max_layer=model$config$num_hidden_layers
}
} else {
model_architecture=NULL
max_position_embeddings=NULL
max_layer=NULL
}
return(list(model_architecture,max_position_embeddings))
return(list(model_architecture,max_position_embeddings,max_layer))
})

shiny::observe({
Expand All @@ -2290,6 +2312,16 @@ start_aifeducation_studio<-function(){

output$lm_interface_setting<-shiny::renderUI({
if(length(interface_architecture()[[2]])>0){

max_layer_transformer=interface_architecture()[[3]]

if(interface_architecture()[[1]]=="FunnelForMaskedLM"|
interface_architecture()[[1]]=="FunnelModel"){
pool_type_choices=c("cls")
} else {
pool_type_choices=c("average","cls")
}

ui<-shinydashboard::box(title = "Interface Setting",
width = 12,
solidHeader = TRUE,
Expand Down Expand Up @@ -2324,13 +2356,19 @@ start_aifeducation_studio<-function(){
min = 0,
max= interface_architecture()[[2]],
step = 1),
shiny::selectInput(inputId = "lm_aggregation",
label = "Aggregation Hidden States",
choices = c("last",
"second_to_last",
"fourth_to_last",
"all",
"last_four"))
shiny::sliderInput(inputId = "lm_emb_layers",
label = "Layers for Embeddings",
value=c(
max(1,floor(0.5*max_layer_transformer)),
max(1,floor(2/3*max_layer_transformer))),
min=1,
max=max_layer_transformer,
step=1),
shiny::selectInput(inputId = "lm_emb_pool_type",
label=paste("Pooling Type"),
choices=pool_type_choices,
multiple=FALSE
),
)
)
)
Expand Down Expand Up @@ -2372,17 +2410,22 @@ start_aifeducation_studio<-function(){

#Create the interface
shiny::observeEvent(input$lm_save_interface,{
model_architecture=interface_architecture()[1]
model_architecture=interface_architecture()[[1]]
print(model_architecture)
if(model_architecture=="BertForMaskedLM"){
if(model_architecture=="BertForMaskedLM"|
model_architecture=="BertModel"){
method="bert"
} else if(model_architecture=="FunnelForMaskedLM"){
} else if(model_architecture=="FunnelForMaskedLM"|
model_architecture=="FunnelModel"){
method="funnel"
} else if(model_architecture=="LongformerForMaskedLM"){
} else if(model_architecture=="LongformerForMaskedLM"|
model_architecture=="LongformerModel"){
method="longformer"
} else if(model_architecture=="RobertaForMaskedLM"){
} else if(model_architecture=="RobertaForMaskedLM"|
model_architecture=="RobertaModel"){
method="roberta"
} else if(model_architecture=="DebertaV2ForMaskedLM"){
} else if(model_architecture=="DebertaV2ForMaskedLM"|
model_architecture=="DebertaV2Model"){
method="deberta_v2"
}

Expand All @@ -2407,7 +2450,9 @@ start_aifeducation_studio<-function(){
max_length = input$lm_max_length,
overlap = input$lm_overlap,
chunks = input$lm_chunks,
aggregation = input$lm_aggregation,
emb_layer_min=input$lm_emb_layers[1],
emb_layer_max=input$lm_emb_layers[2],
emb_pool_type=input$lm_emb_pool_type,
ml_framework = input$config_ml_framework,
model_dir = model_path_interface_LM(),
method = method)
Expand Down Expand Up @@ -2764,7 +2809,16 @@ start_aifeducation_studio<-function(){
shiny::tags$p("Token Overlap: ",model$get_transformer_components()$overlap),
shiny::tags$p("Max Tokens: ",(model$get_model_info()$model_max_size-model$get_transformer_components()$overlap)
*model$get_transformer_components()$chunks+model$get_model_info()$model_max_size),
shiny::tags$p("Hidden States Aggregation: ",model$get_transformer_components()$aggregation),
if(!is.null(model$get_transformer_components()$aggregation)){
shiny::tags$p("Hidden States Aggregation: ",model$get_transformer_components()$aggregation)
},
if(!is.null(model$get_transformer_components()$emb_pool_type)){
shiny::tags$div(
shiny::tags$p("Pool Type: ",model$get_transformer_components()$emb_pool_type),
shiny::tags$p("Embedding Layers - Min: ",model$get_transformer_components()$emb_layer_min),
shiny::tags$p("Embedding Layers - Max: ",model$get_transformer_components()$emb_layer_max)
)
},
shiny::tags$h3("Sustainability"),
if(methods::isClass(Class="data.frame",where = model$get_sustainability_data())){
if(is.na(model$get_sustainability_data()[1,1])==FALSE){
Expand All @@ -2787,6 +2841,37 @@ start_aifeducation_studio<-function(){
)
)

),
#Language Model Training------------------------------------------
shiny::tabPanel("Training",
shiny::fluidRow(
shinydashboard::box(title = "Training",
solidHeader = TRUE,
status = "primary",
width = 12,
shiny::sidebarLayout(
position="right",
sidebarPanel=shiny::sidebarPanel(
shiny::sliderInput(inputId = "lm_performance_text_size",
label = "Text Size",
min = 1,
max = 20,
step = 0.5,
value = 12),
shiny::numericInput(inputId = "lm_performance_y_min",
label = "Y Min",
value = 0),
shiny::numericInput(inputId = "lm_performance_y_max",
label = "Y Max",
value = 20),
),
mainPanel =shiny::mainPanel(
shiny::plotOutput(outputId = "lm_performance_training_loss")
)
)
)
)

),
#Create Text Embeddings---------------------------------------------
shiny::tabPanel("Create Text Embeddings",
Expand Down Expand Up @@ -2940,6 +3025,38 @@ start_aifeducation_studio<-function(){
}
})

output$lm_performance_training_loss<-shiny::renderPlot({
plot_data=LanguageModel_for_Use()$last_training$history

if(!is.null(plot_data)){
y_min=input$lm_performance_y_min
y_max=input$lm_performance_y_max

val_loss_min=min(plot_data$val_loss)
best_model_epoch=which(x=(plot_data$val_loss)==val_loss_min)

plot<-ggplot2::ggplot(data=plot_data)+
ggplot2::geom_line(ggplot2::aes(x=.data$epoch,y=.data$loss,color="train"))+
ggplot2::geom_line(ggplot2::aes(x=.data$epoch,y=.data$val_loss,color="validation"))+
ggplot2::geom_vline(xintercept = best_model_epoch,
linetype="dashed")

plot=plot+ggplot2::theme_classic()+
ggplot2::ylab("value")+
ggplot2::coord_cartesian(ylim=c(y_min,y_max))+
ggplot2::xlab("epoch")+
ggplot2::scale_color_manual(values = c("train"="red",
"validation"="blue",
"test"="darkgreen"))+
ggplot2::theme(text = ggplot2::element_text(size = input$lm_performance_text_size),
legend.position="bottom")
return(plot)
} else {
return(NULL)
}
},res = 72*2)


#Document Page--------------------------------------------------------------
shinyFiles::shinyDirChoose(input=input,
id="lm_db_select_model_for_documentation",
Expand Down
Loading

0 comments on commit c749c48

Please sign in to comment.