/
init.R
executable file
·47 lines (36 loc) · 1.05 KB
/
init.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Load packages
library(ggplot2)
library(tidyr)
library(dplyr)
library(stringr)
library(bibliographica)
library(estc)
library(magrittr)
library(sorvi)
library(reshape2)
library(gridExtra)
library(knitr)
library(devtools)
load_all("~/Rpackages/estc")
# I/O definitions
output.folder <- "output.tables/"
# Create the output directory if not yet exists
dir.create(output.folder)
fs <- list.files("data/unified/", full.names = TRUE, pattern = ".csv.gz")
catalog <- "cerl"
datafile <- "data/unified/polished/df.Rds"
datafile.orig <- "data/unified/polished/df.raw.Rds"
# Summary info
author <- "Leo Lahti"
ntop <- 20
# Set global parameters
timespan <- c(1460, 1830)
# Languages to consider in cleanup.
# TODO: recognize the necessary languages automatically ?
languages <- c("english")
# FIXME: does not work if this is on
# update.fields <- "publication_place"
update.fields <- NULL
# Remove selected fields (almost empty and hence rather uninformative)
ignore.fields <- c("publication_frequency", "publication_interval") # CERL
# ---------------------------------