-
Notifications
You must be signed in to change notification settings - Fork 5
/
graph_monthly_quality_scores.R
112 lines (99 loc) · 4.9 KB
/
graph_monthly_quality_scores.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(lubridate)
library(readr)
library(magrittr)
# Plot mean quality scores by month
# This program is dispatched (called) by the MetaDIG Grapher class. Several
# variables are injected by metadig-engine Dispatcher
# - title: the graph title
# - inFile: the CSV file containing quality scores, which has been prepared by Grapher
# - outFile: the graphics output file to create
# Variables read by metadig-engine Dispatcher after execution
# - mdq_result, output, status
# Define these variable ("infile", "outFile" for local testing only
#inFile <- "toolik.csv"
#outFile <- "toolik-monthly.png"
axisTextFontSize <- 7
legendTextFontSize <- 8
axisTitleFontSize <- 9
legendTitleFontSize <- 9
# Load data
fsr <- read_csv(inFile)
#fsr <- read_csv(inFile) %>% filter(grepl("*eml*", formatId))
scores <- mutate(fsr, ym = as.Date(sprintf("%4s-%02d-01", year(dateUploaded), month(dateUploaded)))) %>%
mutate(scoreF = scoreFindable * 100.0) %>%
mutate(scoreA = scoreAccessible * 100.0) %>%
mutate(scoreI = scoreInteroperable * 100.0) %>%
mutate(scoreR = scoreReusable * 100.0)
most_recent <- scores %>%
arrange(ym, sequenceId, dateUploaded) %>%
group_by(ym, sequenceId) %>%
top_n(1, dateUploaded)
#head(most_recent)
# calculate cummulative overall
score_cumulative <- most_recent %>%
arrange(ym) %>%
group_by(ym) %>%
summarise(f=mean(scoreF), a=mean(scoreA), i=mean(scoreI), r=mean(scoreR)) %>%
mutate(fc=cummean(f), ac=cummean(a), ic=cummean(i), rc=cummean(r)) %>%
select(ym, f, a, i, r, fc, ac, ic, rc) %>%
gather(metric, mean, -ym)
score_cumulative$metric <- factor(score_cumulative$metric,
levels=c("f", "a", "i", "r", "fc", "ac", "ic", "rc"),
labels=c("Findable", "Accessible", "Interoperable", "Reusable",
"Cum. Findable", "Cum. Accessible", "Cum. Interoperable", "Cum. Reusable"))
score_monthly <- score_cumulative %>% filter(metric %in% c("Findable", "Accessible", "Interoperable", "Reusable"))
# Calculate the overall mean for each FAIR category
mf <- score_cumulative %>% filter(metric %in% c("Findable")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
ma <- score_cumulative %>% filter(metric %in% c("Accessible")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
mi <- score_cumulative %>% filter(metric %in% c("Interoperable")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
mr <- score_cumulative %>% filter(metric %in% c("Reusable")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
# See if the 'dateUploaded' dates span multiple years and if not, the x-axis needs to be configured for ggplot so that
# it will display. If it is configured for years and only a single year exists, the x-axis will not display.
minYear <- format(with(score_monthly, min(ym)), "%Y")
maxYear <- format(with(score_monthly, max(ym)), "%Y")
if(minYear == maxYear) {
xLabel <- "Month"
dateBreaks <- "months"
dateMinorBreaks <- "day"
dateFormat <- "%Y-%m"
} else {
xLabel <- "Year"
dateBreaks <- "year"
dateMinorBreaks <- "months"
dateFormat <- "%Y"
}
# Plot cummulative overall
d1_colors <- c("#ff582d", "#c70a61", "#1a6379", "#60c5e4", "#ff582d", "#c70a61", "#1a6379", "#60c5e4")
p <- ggplot(data=score_monthly, mapping=aes(x=ym, y=mean, color=metric)) +
geom_line() +
geom_point(size=1) +
theme_bw() +
theme(panel.border = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = axisTextFontSize),
axis.title = element_text(size = axisTitleFontSize),
legend.title = element_text(size = legendTitleFontSize),
legend.text = element_text(size = legendTextFontSize),
panel.grid.minor = element_blank(),
panel.background = element_blank()) +
#scale_color_manual(name = "Metric", labels = c("Findable", "Accessible", "Interoperable", "Reusable"),
# values=d1_colors) +
scale_color_manual(name = "Metric", labels = c(sprintf("Findable (%.0f%%)", mf),
sprintf("Accessible (%.0f%%)", ma),
sprintf("Interoperable (%.0f%%)", mi),
sprintf("Reusable (%.0f%%)", mr)), values=d1_colors) +
scale_x_date(date_breaks=dateBreaks, date_minor_breaks=dateMinorBreaks, labels=date_format(dateFormat)) +
xlab(xLabel) +
scale_y_continuous(limits=c(0,100)) +
ylab("Average FAIR Score") +
#ggtitle(paste0("DataONE: FAIR scores for ", format(sum(standards$n), big.mark=","), " EML and ISO metadata records"))
#scale_fill_discrete(name = "metric", labels = c("Finabl", "Accessibl", "Interoperabl", "Reusabl")) +
ggsave(outFile, width = 8.0, height = 3.0)
output <- sprintf("Created graphics file %s", outFile)
status <- "SUCCESS"
mdq_result <- list(status = "SUCCESS",
output = list(list(value = "Plot created successfully.")))