-
Notifications
You must be signed in to change notification settings - Fork 3
/
libreoffice.rb
80 lines (74 loc) · 2.91 KB
/
libreoffice.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
require 'iso-639'
module Heathen
class Processor
# Converts office documents to their counterpart (e.g. MS Word -> LibreOffice word,
# or MS Excel -> LibreOffice Sheet) or to PDF. Calls the external 'libreoffice' utility
# to achieve this.
# @param: format [String] output format. Must be one of:
# pdf - convert to PDF (any libre-office format)
# ms - corresponding Microsoft format
# oo - corresponding LibreOffice format
def libreoffice( format: )
suffixes = {
'pdf' => {
'.*' => 'pdf',
},
'msoffice' => {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx',
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx',
'application/vnd.oasis.opendocument.text' => 'docx',
'application/vnd.oasis.opendocument.spreadsheet' => 'xlsx',
'application/vnd.oasis.opendocument.presentation' => 'pptx',
'application/zip' => 'docx',
},
'ooffice' => {
'application/msword' => 'odt',
'application/vnd.ms-word' => 'odt',
'application/vnd.ms-excel' => 'ods',
'application/vnd.ms-office' => 'odt',
'application/vnd.ms-powerpoint' => 'odp',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'odt',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'ods',
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'odp',
},
'txt' => {
'.*' => 'txt'
}
}
raise InvalidParameterInStep.new('format', format) unless suffixes[format.to_s]
to_suffix = nil
suffixes[format.to_s].each do |k,v|
to_suffix = v if job.mime_type =~ /#{k}/
end
raise InvalidMimeTypeInStep.new('(various document formats)', job.mime_type) unless to_suffix
target_file = "#{job.content_file}.#{to_suffix}"
if to_suffix == 'txt'
executioner.execute(
'libreoffice',
'--convert-to', 'pdf',
'--outdir', sandbox_dir,
job.content_file,
'--headless',
)
executioner.execute(
'pdftotext',
"#{job.content_file}.pdf",
target_file
)
else
executioner.execute(
'libreoffice',
'--convert-to', to_suffix,
'--outdir', sandbox_dir,
job.content_file,
'--headless',
)
end
raise ConversionFailed.new(executioner.last_messages) if executioner.last_exit_status != 0
raise ConversionFailed.new("Cannot find converted file (looking for #{File.basename(target_file)})" ) unless File.exist? target_file
job.content = File.read(target_file)
File.unlink(target_file)
end
end
end