Skip to content

Commit

Permalink
Draft 0.2.1.9002
Browse files Browse the repository at this point in the history
  • Loading branch information
FBerding committed Sep 30, 2023
1 parent 5459dc9 commit ac96018
Show file tree
Hide file tree
Showing 78 changed files with 1,147 additions and 615 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: aifeducation
Title: Artificial Intelligence for Education
Version: 0.2.1.9001
Version: 0.2.1.9002
Authors@R: c(
person("Berding", "Florian", , "florian.berding@uni-hamburg.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-3593-1695")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(EmbeddedText)
export(TextEmbeddingClassifierNeuralNet)
export(TextEmbeddingModel)
export(aifeducation_config)
export(array_to_matrix)
export(bow_pp_create_basic_text_rep)
export(bow_pp_create_vocab_draft)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# aifeducation (Development Version)
# aifeducation 0.2.1.9002

- Added DeBERTa and Funnel-Transformer support
- Fixed issues in training transformer models
Expand Down
47 changes: 31 additions & 16 deletions R/install_and_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#'@param envname \code{string} Name of the environment where the packages should
#'be installed.
#'@param tf_version \code{string} determining the desired version of 'tensorflow'.
#'@param remove_first \code{bool} If \code{TRUE} removes the environment completely before
#'recreating the environment and installing the packages. If \code{FALSE} the packages
#'are installed in the existing environment without any prior changes.
#'@return Returns no values or objects. Function is used for installing the
#'necessary python libraries in a conda environment.
#'@importFrom reticulate conda_create
Expand All @@ -13,24 +16,26 @@
#'@family Installation and Configuration
#'@export
install_py_modules<-function(envname="aifeducation",
tf_version="<=2.14"){
tf_version="<=2.14",
remove_first=FALSE){
relevant_modules<-c("transformers",
"tokenizers",
"datasets",
"codecarbon",
"accelerate")

conda_environments<-reticulate::conda_list()
if(remove_first==TRUE){
conda_environments<-reticulate::conda_list()
if((envname %in% conda_environments$name)==TRUE){
reticulate::conda_remove(envname = envname)
}

if((envname %in% conda_environments$name)==TRUE){
reticulate::conda_remove(envname = envname)
reticulate::conda_create(
envname = envname,
channel=c("conda-forge")
)
}

reticulate::conda_create(
envname = envname,
channel=c("conda-forge")
)

reticulate::conda_install(
packages = c(
paste0("tensorflow",tf_version),
Expand Down Expand Up @@ -180,8 +185,6 @@ set_config_os_environ_logger<-function(level="ERROR"){
#'at least version 3. If you have an older version 'tensorflow' is used.
#'
#'@family Installation and Configuration
#'@keywords internal
#'
AifeducationConfiguration<-R6::R6Class(
classname = "aifeducationConfiguration",
private = list(
Expand Down Expand Up @@ -210,23 +213,27 @@ AifeducationConfiguration<-R6::R6Class(
stop("backend must be 'tensorflow' or 'pytorch'.")
}


py_package_list<-reticulate::py_list_packages()
keras_version<-as.character(py_package_list[which(py_package_list$package=="keras"),"version"])

if(private$TextEmbeddingFramework=="not_specified"){
if(utils::compareVersion(keras["__version__"],"2.4.0")>=0 &
utils::compareVersion(keras["__version__"],"3.0.0")<0){
if(utils::compareVersion(keras_version,"2.4.0")>=0 &
utils::compareVersion(keras_version,"3.0.0")<0){
private$TextEmbeddingFramework=backend
private$ClassifierFramework="tensorflow"
os$environ$setdefault("KERAS_BACKEND","tensorflow")

cat("keras Version:",keras["__version__"],"\n")
cat("keras Version:",keras_version,"\n")
cat("Backend for TextEmbeddingModels:",private$TextEmbeddingFramework,"\n")
cat("Backend for Classifiers:",private$ClassifierFramework,"\n")

} else if(utils::compareVersion(keras["__version__"],"3.0.0")>=0){
} else if(utils::compareVersion(keras_version,"3.0.0")>=0){
private$TextEmbeddingFramework=backend
private$ClassifierFramework=backend
os$environ$setdefault("KERAS_BACKEND",backend)

cat("keras Version:",keras["__version__"],"\n")
cat("keras Version:",keras_version,"\n")
cat("Backend for TextEmbeddingModels:",private$TextEmbeddingFramework,"\n")
cat("Backend for Classifiers:",private$ClassifierFramework,"\n")

Expand All @@ -251,3 +258,11 @@ AifeducationConfiguration<-R6::R6Class(
}
)
)

#' R6 object of class AifeducationConfiguration
#'
#' Object for managing setting the machine learning framework of a session.
#'
#'@family Installation and Configuration
#'@export
aifeducation_config<-NULL
1 change: 1 addition & 0 deletions R/onLoad.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ codecarbon<-NULL
torch<-NULL
os<-NULL
keras<-NULL

aifeducation_config<-NULL


Expand Down
13 changes: 13 additions & 0 deletions R/te_classifier_neuralnet_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -1717,6 +1717,19 @@ TextEmbeddingClassifierNeuralNet<-R6::R6Class(
if(sustain_track==TRUE){
sustainability_tracker$stop()
private$sustainability<-summarize_tracked_sustainability(sustainability_tracker)
} else {
private$sustainability=list(
sustainability_tracked=FALSE,
date=NA,
sustainability_data=list(
duration_sec=NA,
co2eq_kg=NA,
cpu_energy_kwh=NA,
gpu_energy_kwh=NA,
ram_energy_kwh=NA,
total_energy_kwh=NA
)
)
}

if(trace==TRUE){
Expand Down
16 changes: 14 additions & 2 deletions R/text_embedding_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ TextEmbeddingModel<-R6::R6Class(
#'@param model_dir \code{string} path to the directory where the
#'BERT model is stored.
#'@param bow_basic_text_rep object of class \code{basic_text_rep} created via
#'the function \link{bow_pp_create_basic_text_rep}. Only relevant for \code{method="glove"}
#'the function \link{bow_pp_create_basic_text_rep}. Only relevant for \code{method="glove_cluster"}
#'and \code{method="lda"}.
#'@param bow_n_dim \code{int} Number of dimensions of the GlobalVector or
#'number of topics for LDA.
Expand Down Expand Up @@ -170,7 +170,7 @@ TextEmbeddingModel<-R6::R6Class(
model_version=NULL,
model_language=NULL,
method=NULL,
ml_framework="tensorflow",
ml_framework=aifeducation_config$get_framework()$TextEmbeddingFramework,
max_length=0,
chunks=1,
overlap=0,
Expand Down Expand Up @@ -229,6 +229,18 @@ TextEmbeddingModel<-R6::R6Class(
private$r_package_versions$aifeducation<-packageVersion("aifeducation")
private$r_package_versions$reticulate<-packageVersion("reticulate")

#Load Sustainability Data-----------------------------------------------
if(!(method %in% c("lda","glove_cluster"))==TRUE){
sustainability_datalog_path=paste0(model_dir,"/","sustainability.csv")
if(file.exists(sustainability_datalog_path)){
tmp_sustainability_data<-read.csv(sustainability_datalog_path)
private$sustainability$sustainability_tracked=TRUE
private$sustainability$track_log=tmp_sustainability_data
} else {
private$sustainability$sustainability_tracked=FALSE
private$sustainability$track_log=NA
}
}

#basic_components-------------------------------------------------------
private$basic_components$method=method
Expand Down
36 changes: 22 additions & 14 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ educational and social sciences.
social and educational sciences.
- Supporting both 'PyTorch' and 'Tensorflow' as machine learning
frameworks.
- Supporting pre-trained language models from Hugging Face.
- Supporting the usage of trained models on both frameworks providing
a high level of flexibility (supported models are BERT, RoBERTa,
DeBERTa, Longformer, and Funnel Transformer).
a high level of flexibility
- Supporting pre-trained language models from Hugging Face.
- Supporting BERT, RoBERTa, DeBERTa, Longformer, and Funnel Transformer for
creating context sensitive text embedding.
- Making sharing pre-trained models very easy.
- Integrating sustainability tracking.
- Integrating special statistical techniques for dealing with data
structures common in the social and educational sciences.
- Supporting the classification of long textual documents.

Currently, the package focuses on classification tasks which can either
be used to diagnose characteristics of learners from written material or
Expand All @@ -70,11 +72,18 @@ You can install the development version of aifeducation from
[GitHub](https://github.com/) with:

``` r
# install.packages("devtools")
devtools::install_github("FBerding/aifeducation",
dependencies = TRUE)
install.packages("devtools")
devtools::install_github(repo="FBerding/aifeducation",
ref="master",
dependencies = TRUE)
```

> **Note:** There is currently a bug in reticulate 1.32.0 which does not allow
to load python packages such as tensorflow, keras or torch. The error has been
fixed in the developer version of reticulate. You can install this version with
`install.packages("pak")` and `pak::pak("rstudio/reticulate")`.
With the release of reticulate 1.33.0 you can ignore this note.

## Sustainability

Training AI models consumes time and energy. To help researchers
Expand All @@ -90,7 +99,7 @@ computer is located.
This package allows all supported models based either on 'PyTorch' or
'tensorflow', providing a high level of flexibility. Even pre-trained
models can be used with both frameworks. In general, users only have to
decide which framework they would like to use when loading a model.
decide which framework they would like to use when starting a new session.

At the moment, using 'PyTorch' for classifier objects requires 'keras-core'
or 'keras3'. If your system does not support these python packages you can
Expand Down Expand Up @@ -226,13 +235,12 @@ AI/Models](https://fberding.github.io/aifeducation/articles/sharing_and_publishi
you can find a detailed guide on how to document and share your models.

## Tutorial and Guides

A guide on how to install and configure this package can be found via
[Get
started](https://fberding.github.io/aifeducation/articles/aifeducation.html).
A short introduction into the package with examples for classification
tasks can be found in vignette [02 classification
tasks](https://fberding.github.io/aifeducation/articles/classification_tasks.html).
- Installation and configuration of the package:
[01 get started](https://fberding.github.io/aifeducation/articles/aifeducation.html).
- A short introduction into the package with examples for classification
tasks:[02 classification tasks](https://fberding.github.io/aifeducation/articles/classification_tasks.html).
- A description for sharing models:
[03 sharing and using trained AI/models](https://fberding.github.io/aifeducation/articles/sharing_and_publishing.html)

## References

Expand Down
38 changes: 25 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ educational and social sciences.
educational sciences.
- Supporting both ‘PyTorch’ and ‘Tensorflow’ as machine learning
frameworks.
- Supporting pre-trained language models from Hugging Face.
- Supporting the usage of trained models on both frameworks providing a
high level of flexibility (supported models are BERT, RoBERTa,
DeBERTa, Longformer, and Funnel Transformer).
high level of flexibility
- Supporting pre-trained language models from Hugging Face.
- Supporting BERT, RoBERTa, DeBERTa, Longformer, and Funnel Transformer
for creating context sensitive text embedding.
- Making sharing pre-trained models very easy.
- Integrating sustainability tracking.
- Integrating special statistical techniques for dealing with data
structures common in the social and educational sciences.
- Supporting the classification of long textual documents.

Currently, the package focuses on classification tasks which can either
be used to diagnose characteristics of learners from written material or
Expand All @@ -55,11 +57,19 @@ You can install the development version of aifeducation from
[GitHub](https://github.com/) with:

``` r
# install.packages("devtools")
devtools::install_github("FBerding/aifeducation",
dependencies = TRUE)
install.packages("devtools")
devtools::install_github(repo="FBerding/aifeducation",
ref="master",
dependencies = TRUE)
```

> **Note:** There is currently a bug in reticulate 1.32.0 which does not
> allow to load python packages such as tensorflow, keras or torch. The
> error has been fixed in the developer version of reticulate. You can
> install this version with `install.packages("pak")` and
> `pak::pak("rstudio/reticulate")`. With the release of reticulate
> 1.33.0 you can ignore this note.
## Sustainability

Training AI models consumes time and energy. To help researchers
Expand All @@ -75,7 +85,8 @@ computer is located.
This package allows all supported models based either on ‘PyTorch’ or
‘tensorflow’, providing a high level of flexibility. Even pre-trained
models can be used with both frameworks. In general, users only have to
decide which framework they would like to use when loading a model.
decide which framework they would like to use when starting a new
session.

At the moment, using ‘PyTorch’ for classifier objects requires
‘keras-core’ or ‘keras3’. If your system does not support these python
Expand Down Expand Up @@ -210,12 +221,13 @@ you can find a detailed guide on how to document and share your models.

## Tutorial and Guides

A guide on how to install and configure this package can be found via
[Get
started](https://fberding.github.io/aifeducation/articles/aifeducation.html).
A short introduction into the package with examples for classification
tasks can be found in vignette [02 classification
tasks](https://fberding.github.io/aifeducation/articles/classification_tasks.html).
- Installation and configuration of the package: [01 get
started](https://fberding.github.io/aifeducation/articles/aifeducation.html).
- A short introduction into the package with examples for classification
tasks:[02 classification
tasks](https://fberding.github.io/aifeducation/articles/classification_tasks.html).
- A description for sharing models: [03 sharing and using trained
AI/models](https://fberding.github.io/aifeducation/articles/sharing_and_publishing.html)

## References

Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ac96018

Please sign in to comment.