Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 0 3 2 #4

Merged
merged 13 commits into from
Mar 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ cran-comments.md
^doc$
^Meta$
gui_aife_studio.Rmd
classification_tasks.Rmd
intel_power_gadget_log.csv
reference
2 changes: 1 addition & 1 deletion .Rhistory
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#Callback-------------------------------------------------------------------
if use_callback==True:
if bacc_val>best_bacc:
if trace>=1:
Expand Down Expand Up @@ -510,3 +509,4 @@ install.packages(c("BH", "brew", "brio", "cli", "coda", "commonmark", "curl", "d
install.packages(c("BH", "brew", "brio", "cli", "coda", "commonmark", "curl", "data.table", "DBI", "desc", "digest", "DT", "e1071", "fansi", "float", "FNN", "glue", "htmlwidgets", "httpuv", "igraph", "ISOcodes", "later", "LiblineaR", "listenv", "lpSolve", "markdown", "Matrix", "MatrixExtra", "mgcv", "MplusAutomation", "pak", "parallelly", "pkgbuild", "pkgload", "processx", "progress", "ps", "psych", "ragg", "Rcpp", "RcppArmadillo", "RCurl", "readODS", "readr", "reticulate", "rJava", "roxygen2", "s2", "sass", "sf", "shinyWidgets", "stringi", "survival", "tensorflow", "tfruns", "tidyr", "topicmodels", "vroom", "withr", "xfun", "yaml", "zip"))
install.packages("brew")
install.packages(c("BH", "brio", "cli", "coda", "commonmark", "curl", "data.table", "DBI", "desc", "digest", "DT", "e1071", "fansi", "float", "FNN", "glue", "htmlwidgets", "httpuv", "igraph", "ISOcodes", "later", "LiblineaR", "listenv", "lpSolve", "markdown", "Matrix", "MatrixExtra", "mgcv", "MplusAutomation", "pak", "parallelly", "pkgbuild", "pkgload", "processx", "progress", "ps", "psych", "ragg", "Rcpp", "RcppArmadillo", "RCurl", "readODS", "readr", "reticulate", "rJava", "roxygen2", "s2", "sass", "sf", "shinyWidgets", "stringi", "survival", "tensorflow", "tfruns", "tidyr", "topicmodels", "vroom", "withr", "xfun", "yaml", "zip"))
devtools::test()
7 changes: 3 additions & 4 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master, version_0_3_1_dev]
branches: [main, master, version_0_3_2_dev]
pull_request:
branches: [main, master, version_0_3_1_dev]
branches: [main, master, version_0_3_2_dev]

name: R-CMD-check

Expand Down Expand Up @@ -66,11 +66,10 @@ jobs:

conda_install(
packages = c(
"tensorflow-cpu",
"tensorflow-cpu<=2.15",
"torch",
"torcheval",
"safetensors",
"keras",
"accelerate"),
envname = envname,
conda = "auto",
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: aifeducation
Title: Artificial Intelligence for Education
Version: 0.3.1
Version: 0.3.2
Authors@R: c(
person("Berding", "Florian", , "florian.berding@uni-hamburg.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-3593-1695")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(bow_pp_create_basic_text_rep)
export(bow_pp_create_vocab_draft)
export(calc_standard_classification_measures)
export(check_aif_py_modules)
export(clean_pytorch_log_transformers)
export(combine_embeddings)
export(create_bert_model)
export(create_deberta_v2_model)
Expand All @@ -20,6 +21,7 @@ export(get_coder_metrics)
export(get_n_chunks)
export(get_synthetic_cases)
export(install_py_modules)
export(is.null_or_na)
export(load_ai_model)
export(matrix_to_array_c)
export(save_ai_model)
Expand Down
42 changes: 41 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,46 @@ editor_options:
wrap: 72
---

# aifeducation 0.3.2

**TextEmbeddingClassifiers**

- Fixed a bug in GlobalAveragePooling1D_PT. Now the layer makes a correct pooling.
**This change has an effect on PyTorch models trained with version 0.3.1.**

**TextEmbeddingModel**

- Replaced the parameter 'aggregation' with three new parameters allowing to explicitly
choose the start and end layer to be included in the creation of embeddings. Furthermore,
two options for the pooling method within each layer is added ("cls" and "average").
- Added support for reporting the training and validation loss during training
the corresponding base model.

**Transformer Models**

- Fixed a bug in the creation of all transformer models except funnel. Now choosing the
number of layers is working.
- A file 'history.log' is now saved within the model's folder reporting the loss
and validation loss during training for each epoch.

**EmbeddedText**

- Changed the process for validating if EmbeddedTexts are compatible. Now only
the model's unique name is used for the validation.
- Added new fields and updated methods to account for the new options in creating embeddings (layer
selection and pooling type).

**Graphical User Interface Aifeducation Studio**

- Adapted the interface according to the changes made in this version.
- Improved the read of raw texts. Reading now reduces multiple spaces characters to
one single space character. Hyphenation is removed.

**Python Installation**

- Updated installation to account for the new version of keras.


# aifeducation 0.3.1

**Graphical User Interface Aifeducation Studio**
Expand Down Expand Up @@ -63,7 +103,7 @@ editor_options:
- Added an argument to 'install_py_modules',
allowing to choose which machine learning framework should be
installed.
- Updated 'check_aif_py_modules'.
- Updated 'check_aif_py_modules'.

**Further Changes**

Expand Down
159 changes: 138 additions & 21 deletions R/aif_gui.R
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,8 @@ start_aifeducation_studio<-function(){
total=n_files,
title = as.character(all_paths[i]))
tmp_document=readtext::readtext(file=all_paths[i])
tmp_document$text=stringr::str_replace_all(tmp_document$text,pattern = "[:space:]{1,}",replacement = " ")
tmp_document$text=stringr::str_replace_all(tmp_document$text,pattern = "-(?=[:space:])",replacement = "")
#File name without extension
#text_corpus[counter,"id"]=stringi::stri_split_fixed(tmp_document$doc_id,pattern=".")[[1]][1]
tmp_string=stringr::str_split_fixed(tmp_document$doc_id,pattern="\\.",n=Inf)
Expand Down Expand Up @@ -1751,12 +1753,16 @@ start_aifeducation_studio<-function(){
})

train_tune_model_architecture<-shiny::eventReactive(model_path_train_LM(),{
shinyWidgets::show_alert(title="Loading",
text = "Please wait",
type="info",
closeOnClickOutside = FALSE,
showCloseButton = FALSE)
model_path<-model_path_train_LM()
print(model_path)
if(!is.null(model_path) &
!identical(model_path,character(0))){
shinyWidgets::show_alert(title="Loading",
text = "Please wait",
type="info",
closeOnClickOutside = FALSE,
showCloseButton = FALSE)
}
if(!is.null(model_path)){
if(file.exists(paste0(model_path,
"/",
Expand Down Expand Up @@ -2257,23 +2263,39 @@ start_aifeducation_studio<-function(){
model<-transformers$TFAutoModel$from_pretrained(model_path)
model_architecture<-model$config$architectures
max_position_embeddings=model$config$max_position_embeddings
if(model_architecture=="FunnelForMaskedLM"){
max_layer=sum(model$config$block_repeats*model$config$block_sizes)
} else {
max_layer=model$config$num_hidden_layers
}
} else if(file.exists(paste0(model_path,
"/",
"pytorch_model.bin"))){
model<-transformers$AutoModel$from_pretrained(model_path)
model_architecture<-model$config$architectures
max_position_embeddings=model$config$max_position_embeddings
if(model_architecture=="FunnelForMaskedLM"){
max_layer=sum(model$config$block_repeats*model$config$block_sizes)
} else {
max_layer=model$config$num_hidden_layers
}
} else if(file.exists(paste0(model_path,
"/",
"model.safetensors"))){
model<-transformers$AutoModel$from_pretrained(model_path)
model_architecture<-model$config$architectures
max_position_embeddings=model$config$max_position_embeddings
if(model_architecture=="FunnelForMaskedLM"){
max_layer=sum(model$config$block_repeats*model$config$block_sizes)
} else {
max_layer=model$config$num_hidden_layers
}
} else {
model_architecture=NULL
max_position_embeddings=NULL
max_layer=NULL
}
return(list(model_architecture,max_position_embeddings))
return(list(model_architecture,max_position_embeddings,max_layer))
})

shiny::observe({
Expand All @@ -2290,6 +2312,16 @@ start_aifeducation_studio<-function(){

output$lm_interface_setting<-shiny::renderUI({
if(length(interface_architecture()[[2]])>0){

max_layer_transformer=interface_architecture()[[3]]

if(interface_architecture()[[1]]=="FunnelForMaskedLM"|
interface_architecture()[[1]]=="FunnelModel"){
pool_type_choices=c("cls")
} else {
pool_type_choices=c("average","cls")
}

ui<-shinydashboard::box(title = "Interface Setting",
width = 12,
solidHeader = TRUE,
Expand Down Expand Up @@ -2324,13 +2356,19 @@ start_aifeducation_studio<-function(){
min = 0,
max= interface_architecture()[[2]],
step = 1),
shiny::selectInput(inputId = "lm_aggregation",
label = "Aggregation Hidden States",
choices = c("last",
"second_to_last",
"fourth_to_last",
"all",
"last_four"))
shiny::sliderInput(inputId = "lm_emb_layers",
label = "Layers for Embeddings",
value=c(
max(1,floor(0.5*max_layer_transformer)),
max(1,floor(2/3*max_layer_transformer))),
min=1,
max=max_layer_transformer,
step=1),
shiny::selectInput(inputId = "lm_emb_pool_type",
label=paste("Pooling Type"),
choices=pool_type_choices,
multiple=FALSE
),
)
)
)
Expand Down Expand Up @@ -2372,17 +2410,22 @@ start_aifeducation_studio<-function(){

#Create the interface
shiny::observeEvent(input$lm_save_interface,{
model_architecture=interface_architecture()[1]
model_architecture=interface_architecture()[[1]]
print(model_architecture)
if(model_architecture=="BertForMaskedLM"){
if(model_architecture=="BertForMaskedLM"|
model_architecture=="BertModel"){
method="bert"
} else if(model_architecture=="FunnelForMaskedLM"){
} else if(model_architecture=="FunnelForMaskedLM"|
model_architecture=="FunnelModel"){
method="funnel"
} else if(model_architecture=="LongformerForMaskedLM"){
} else if(model_architecture=="LongformerForMaskedLM"|
model_architecture=="LongformerModel"){
method="longformer"
} else if(model_architecture=="RobertaForMaskedLM"){
} else if(model_architecture=="RobertaForMaskedLM"|
model_architecture=="RobertaModel"){
method="roberta"
} else if(model_architecture=="DebertaV2ForMaskedLM"){
} else if(model_architecture=="DebertaV2ForMaskedLM"|
model_architecture=="DebertaV2Model"){
method="deberta_v2"
}

Expand All @@ -2407,7 +2450,9 @@ start_aifeducation_studio<-function(){
max_length = input$lm_max_length,
overlap = input$lm_overlap,
chunks = input$lm_chunks,
aggregation = input$lm_aggregation,
emb_layer_min=input$lm_emb_layers[1],
emb_layer_max=input$lm_emb_layers[2],
emb_pool_type=input$lm_emb_pool_type,
ml_framework = input$config_ml_framework,
model_dir = model_path_interface_LM(),
method = method)
Expand Down Expand Up @@ -2764,7 +2809,16 @@ start_aifeducation_studio<-function(){
shiny::tags$p("Token Overlap: ",model$get_transformer_components()$overlap),
shiny::tags$p("Max Tokens: ",(model$get_model_info()$model_max_size-model$get_transformer_components()$overlap)
*model$get_transformer_components()$chunks+model$get_model_info()$model_max_size),
shiny::tags$p("Hidden States Aggregation: ",model$get_transformer_components()$aggregation),
if(!is.null(model$get_transformer_components()$aggregation)){
shiny::tags$p("Hidden States Aggregation: ",model$get_transformer_components()$aggregation)
},
if(!is.null(model$get_transformer_components()$emb_pool_type)){
shiny::tags$div(
shiny::tags$p("Pool Type: ",model$get_transformer_components()$emb_pool_type),
shiny::tags$p("Embedding Layers - Min: ",model$get_transformer_components()$emb_layer_min),
shiny::tags$p("Embedding Layers - Max: ",model$get_transformer_components()$emb_layer_max)
)
},
shiny::tags$h3("Sustainability"),
if(methods::isClass(Class="data.frame",where = model$get_sustainability_data())){
if(is.na(model$get_sustainability_data()[1,1])==FALSE){
Expand All @@ -2787,6 +2841,37 @@ start_aifeducation_studio<-function(){
)
)

),
#Language Model Training------------------------------------------
shiny::tabPanel("Training",
shiny::fluidRow(
shinydashboard::box(title = "Training",
solidHeader = TRUE,
status = "primary",
width = 12,
shiny::sidebarLayout(
position="right",
sidebarPanel=shiny::sidebarPanel(
shiny::sliderInput(inputId = "lm_performance_text_size",
label = "Text Size",
min = 1,
max = 20,
step = 0.5,
value = 12),
shiny::numericInput(inputId = "lm_performance_y_min",
label = "Y Min",
value = 0),
shiny::numericInput(inputId = "lm_performance_y_max",
label = "Y Max",
value = 20),
),
mainPanel =shiny::mainPanel(
shiny::plotOutput(outputId = "lm_performance_training_loss")
)
)
)
)

),
#Create Text Embeddings---------------------------------------------
shiny::tabPanel("Create Text Embeddings",
Expand Down Expand Up @@ -2940,6 +3025,38 @@ start_aifeducation_studio<-function(){
}
})

output$lm_performance_training_loss<-shiny::renderPlot({
plot_data=LanguageModel_for_Use()$last_training$history

if(!is.null(plot_data)){
y_min=input$lm_performance_y_min
y_max=input$lm_performance_y_max

val_loss_min=min(plot_data$val_loss)
best_model_epoch=which(x=(plot_data$val_loss)==val_loss_min)

plot<-ggplot2::ggplot(data=plot_data)+
ggplot2::geom_line(ggplot2::aes(x=.data$epoch,y=.data$loss,color="train"))+
ggplot2::geom_line(ggplot2::aes(x=.data$epoch,y=.data$val_loss,color="validation"))+
ggplot2::geom_vline(xintercept = best_model_epoch,
linetype="dashed")

plot=plot+ggplot2::theme_classic()+
ggplot2::ylab("value")+
ggplot2::coord_cartesian(ylim=c(y_min,y_max))+
ggplot2::xlab("epoch")+
ggplot2::scale_color_manual(values = c("train"="red",
"validation"="blue",
"test"="darkgreen"))+
ggplot2::theme(text = ggplot2::element_text(size = input$lm_performance_text_size),
legend.position="bottom")
return(plot)
} else {
return(NULL)
}
},res = 72*2)


#Document Page--------------------------------------------------------------
shinyFiles::shinyDirChoose(input=input,
id="lm_db_select_model_for_documentation",
Expand Down
Loading