<h2>Relis R code analysis</h2>

<h4><u>Parsing and beautifing data</u></h4>

In [None]:
# Install and load the necessary packages
packgs <- c("tidyverse", "qdapRegex", "data.table", "janitor", "dplyr", "ggplot2", "cowplot", "psych")
install.packages(setdiff(packgs, unique(data.frame(installed.packages())$Package)))
lapply(packgs, library, character.only = TRUE)

<i>Xtend will need to read the *RSC* *ReliS* project models with the ecore API</i>

In [36]:
# Importing data.csv
relis_data <- read.csv("../data/relis_classification_CV.csv", header = TRUE) # Replace this with the name of your imported data file
print(relis_data)

  nbr Key Title Publication.year Author.s Venue Source Search.Type Reviewer.s
1   1  NA    NA             2016       NA    NA     NA          NA         NA
2   2  NA    NA             2018       NA    NA     NA          NA         NA
3   3  NA    NA             2016       NA    NA     NA          NA         NA
4   4  NA    NA             2016       NA    NA     NA          NA         NA
5   5  NA    NA             2019       NA    NA     NA          NA         NA
6   6  NA    NA             2016       NA    NA     NA          NA         NA
7   7  NA    NA             2017       NA    NA     NA          NA         NA
8 943  NA    NA             2016       NA    NA     NA          NA         NA
            Transformation.name                  Domain Transformation.Language
1 Model-Driven Data Migration 2 Artificial Intelligence           ATL | Henshin
2                         Test1    Collaborative system                 Henshin
3                        testtt    Collaborative system   

In [None]:
## Config file
# Beautifying Title
config_file <- data.frame(
    Column_name = c("Transformation.name", "Domain", "Transformation.Language", "Source.language", "Target.language", "Scope", "Industrial", "Bidirectional", "Targeted.year", "Note", "Publication.year", "Venue", "Search.Type"),
    Scale = c("Text", "Nominal", "Nominal", "Nominal", "Nominal", "Nominal", "Nominal", "Nominal", "Continuous", "Text", "Continuous", "Nominal", "Nominal")
)
print(config_file)

In [None]:
# Split config file based on data type
nominal_df <- subset(config_file, Scale == "Nominal")
continuous_df <- subset(config_file, Scale == "Continuous")
print(nominal_df)
print(continuous_df)

<h4><u>DESCRIPTIVE STATS</u></h4>

<h5 style="color:orange">Functions<h5>

In [25]:
# Function to extract current column and organize data
beautify_data_desc <- function(data, config_file, i) {
    # Split the values by the "|" character
    split_values <- str_split(data[[config_file$Column_name[i]]], "\\|")

    # Flatten the split values into a single vector and remove leading and trailing whitespaces
    flattened_values <- str_trim(unlist(split_values))

    # Generate the frequency table
    table_to_add <- tabyl(flattened_values)

    table_to_add["percent"] <- lapply(table_to_add["percent"], function(x) x * 100)
    colnames(table_to_add) <- c("Value", "n", "Percentage")

    return(table_to_add)
}

beautify_data_desc_cont <- function(data, config_file, i) {
    table_to_add <- data[, config_file$Column_name[i]]
    table_to_add <- data.frame(data = table_to_add)

    return(table_to_add)
}

# Function to generate bar plots
generate_bar_plot <- function(data, config_file, i) {
    table_to_add <- beautify_data_desc(data, config_file, i)

    p <- ggplot(data = table_to_add, aes(x = Value, y = Percentage, fill = n)) +
        geom_bar(stat = "identity") +
        labs(title = paste(config_file$Title[[i]], "~ Bar plot"), x = config_file$Title[[i]], y = "Percentage") +
        theme_minimal()

    return(p)
}

# Function to generate box plots
generate_box_plot <- function(data, config_file, i) {
    table_to_add <- beautify_data_desc_cont(data, config_file, i)

    p <- ggplot(table_to_add, aes(x = "x", y = data)) +
        geom_boxplot() +
        stat_summary(fun = "mean", geom = "point", shape = 8, size = 2, color = "red") +
        labs(title = paste(config_file$Title[[i]], "~ Box plot"), y = config_file$Title[[i]], x = "") +
        theme_minimal()

    return(p)
}

# Function to generate violin plots
generate_violin_plot <- function(data, config_file, i) {
    table_to_add <- beautify_data_desc_cont(data, config_file, i)

    p <- ggplot(table_to_add, aes(x = "x", y = data)) +
        geom_violin() +
        stat_summary(fun = "mean", geom = "point", shape = 8, size = 2, color = "red") +
        labs(title = paste(config_file$Title[[i]], "~ Violin plot"), y = config_file$Title[[i]], x = "") +
        theme_minimal()

    return(p)
}

<h5 style="color:#F0F8FF">Execution<h5>

In [None]:
# Initialize lists to store frequency tables and bar plots for nominal data
desc_distr_vector <- list()

# Generate frequency table and bar plot for each variable
for (i in 1:nrow(nominal_df)) {
    # Frequency table
    desc_distr_vector[[nominal_df$Column_name[i]]] <- beautify_data_desc(relis_data, nominal_df, i)
}

print(desc_distr_vector)


In [None]:
bar_plot_vector <- list()

for (i in 1:nrow(nominal_df)) {
    # Bar plot
    bar_plot_vector[[nominal_df$Column_name[i]]] <- generate_bar_plot(relis_data, nominal_df, i)
}

print(bar_plot_vector)

In [39]:
statistics_vector <- list()

# Initialize lists to store frequency tables and plots for continuous data
for (i in 1:nrow(continuous_df)) {
    # Calculate descriptive statistics
    statistics_vector[[continuous_df$Column_name[i]]] <- describe(beautify_data_desc_cont(relis_data, continuous_df, i))
}

print(statistics_vector)

In [31]:
box_plot_vector <- list()

# Initialize lists to store frequency tables and plots for continuous data
for (i in 1:nrow(continuous_df)) {
    # Generate plots for each continuous variable
    box_plot_vector[[continuous_df$Column_name[i]]] <- generate_box_plot(relis_data, continuous_df, i)
}

print(box_plot_vector)

In [None]:
violin_plot_vector <- list()

# Initialize lists to store frequency tables and plots for continuous data
for (i in 1:nrow(continuous_df)) {
    violin_plot_vector[[continuous_df$Column_name[i]]] <- generate_violin_plot(relis_data, continuous_df, i)
}

print(violin_plot_vector)

<h4><u>EVOLUTION STATS</u></h4>

<h5 style="color:orange">Functions<h5>

In [42]:
# Available functions
# Function to extract current column and organize data
beautify_data_evo <- function(data, config_file, i) {
    table_to_add <- data.frame(data$Publication.year, data[[config_file$Column_name[i]]])
    colnames(table_to_add) <- c("Year", "Value")
    table_to_add <- subset(table_to_add, Value != "")

    table_to_add <- table_to_add %>%
        separate_rows(Value, sep = "\\s*\\|\\s*") %>%
        count(Year, Value, name = "Frequency")

    return(table_to_add)
}

# Function to generate distribution table
expand_data <- function(data, config_file, i) {
    table_to_add <- beautify_data_evo(data, config_file, i)

    y <- pivot_wider(table_to_add, names_from = "Value", values_from = "Frequency") %>%
        mutate_all(~ replace(., is.na(.), 0)) # Replace NA with 0

    return(y)
}

# Function to generate evolution plots
generate_evo_plot <- function(data, config_file, i) {
    table_to_add <- beautify_data_evo(data, config_file, i)

    shape_vector <- rep(1:6, length.out = length(unique(table_to_add$Value)))

    p <- ggplot(data = table_to_add, aes(x = Year, y = Frequency, color = Value, shape = Value, group = Value, linetype = Value)) +
        geom_line(stat = "identity", size = 1.1) +
        geom_point(size = 2) +
        scale_shape_manual(values = shape_vector) +
        labs(title = paste(config_file$Title[[i]], "~ Evolution plot"), x = "Year", y = "Frequency") +
        theme_minimal()

    return(p)
}