This repository has been archived by the owner on Mar 21, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
/
readme_manipulator.rb
236 lines (188 loc) · 7.32 KB
/
readme_manipulator.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# Takes a readme path and makes some changes, then passes that
# back to the system as a rendered version of the README
class ReadmeManipulator
include HashInit
attr_accessor :spec, :readme_path
def run_for_cocoadocs
return unless @spec.or_is_github?
return unless File.exist? @readme_path
make_dupe
fix_relative_links_in_gfm
remove_known_badges
remove_named_header
fix_header_anchors
remove_dependency_manager_cues
end
def run_for_jazzy
return unless @spec.or_is_github?
return unless File.exist? @readme_path
fix_relative_links_in_gfm('article.main-content .section')
remove_known_badges
remove_named_header
fix_header_anchors
remove_dependency_manager_cues('article.main-content .section')
end
def make_dupe
FileUtils.cp @readme_path, @readme_path.sub(".html", "_original.html")
end
def fix_relative_link(link_string)
if link_string.start_with? "#"
return link_string
end
if link_string.start_with? "http"
return link_string
end
if link_string.start_with? "https"
return link_string
end
if link_string.include? "@"
return link_string
end
# Turn forward slahes(%2F) back into forward slashes
link_path = CGI.escape(link_string).gsub('%2F', '/')
"https://raw.github.com/#{@spec.or_user}/#{@spec.or_repo}/#{@spec.or_git_ref}/#{link_path}"
end
def fix_relative_links_in_gfm(parent_selector = nil)
vputs "Fixing relative URLs in github flavoured markdown"
doc = Nokogiri::HTML(File.read @readme_path)
main = parent_selector ? doc.css(parent_selector).first : doc
main.css("a").each do |link|
if link.attributes["href"]
link.attributes["href"].value = fix_relative_link link.attributes["href"].value
end
end
main.css("img").each do |img|
if img.attributes["src"]
img.attributes["src"].value = fix_relative_link img.attributes["src"].value
end
end
`rm \"#{@readme_path}\"`
File.open(@readme_path, 'w') { |f| f.write(doc) }
end
# Don't have a redundant header under the site's provided one
def remove_named_header
doc = Nokogiri::HTML(File.read @readme_path)
# Nokogiri counts whitespace as a text node, so remove all of them for this case
real_elements = doc.xpath('/html/body/child::node()').select do |node|
node.type != Nokogiri::XML::Node::TEXT_NODE || node.content =~ /\S/
end
header_anchor = real_elements[0]
# Is it an empty paragraph (because we removed all badges?
if header_anchor.name == "p" && header_anchor.text.strip == ""
header_anchor.remove
header_anchor = real_elements[1]
end
header_anchor.remove if header_anchor.text.strip == @spec.name
`rm \"#{@readme_path}\"`
File.open(@readme_path, 'w') { |f| f.write(doc) }
end
def remove_known_badges
vputs "Fixing Travis links in markdown"
doc = Nokogiri::HTML(File.read @readme_path)
doc.css('a[href^="https://travis-ci.org"]').each do |link|
link.remove if link.inner_html.include? ".svg"
link.remove if link.inner_html.include? ".png?branch"
end
urls_to_delete = ['http://cocoapod-badges.herokuapp',
'https://cocoapod-badges.herokuapp',
'https://img.shields.io',
'http://img.shields.io',
'https://reposs.herokuapp.com',
'https://secure.travis-ci.org',
'https://kiwiirc.com',
'https://badges.gitter.im',
'https://coveralls.io',
'https://landscape.io',
'https://badge.waffle.io',
'https://codecov.io/',
'https://versioneye.com/']
urls_to_delete.each do |selector|
doc.css('img[data-canonical-src^="' + selector + '"]').each do |image|
image.parent.remove
end
end
`rm \"#{@readme_path}\"`
File.open(@readme_path, 'w') { |f| f.write(doc) }
end
def fix_header_anchors
vputs "Fixing header anchor names"
return unless @spec.or_is_github?
return unless File.exist? @readme_path
doc = Nokogiri::HTML(File.read @readme_path)
nodes = doc.css('h1, h2, h3')
nodes.each do |node|
href = node.css('a').first
if href && href.attributes["name"]
href.attributes["name"].value = href.attributes["name"].value.gsub(/user-content-/, "")
end
end
`rm \"#{@readme_path}\"`
File.open(@readme_path, 'w') { |f| f.write(doc) }
end
def remove_dependency_manager_cues(parent_selector = nil)
doc = Nokogiri::HTML(File.read @readme_path)
main = parent_selector ? doc.css(parent_selector).first : doc.css("body")
# replace all clickable Carthage links with just the text
doc.css('a[href^="https://github.com/Carthage/Carthage"]').each do |link|
link.replace(Nokogiri::XML::Text.new(link.text, doc))
end
# replace all clickable CP links with just the text
doc.css('a[href^="https://cocoapods.org"]').each do |link|
link.replace(Nokogiri::XML::Text.new(link.text, doc))
end
# replace sections beginning with a header
main = remove_dependency_manager_section main, "carthage"
main = remove_dependency_manager_section main, "cocoapods"
# look for just a paragraph then a pre with 'github "' or 'pod "
main = remove_two_linked_paragraphs main, "carthage", 'github "'
main = remove_two_linked_paragraphs main, "cocoapods", 'pod "'
# Look for an empty installation section and remove the 'installation' h2
install = doc.at('h2:contains("Install")')
if install
install_index = main.children.find_index install
if install_index
first_sibling = nil
# Get first non-empty value
(install_index + 1..main.children.length).each do |index|
child = main.children[index]
next if !child || !child.text || child.text.strip.empty?
first_sibling = child
break
end
install.remove if first_sibling && first_sibling.name == "h2"
end
end
File.open(@readme_path, 'w') { |f| f.write(doc) }
end
def remove_dependency_manager_section main_element, name
in_section = false
main_element.children.each do |child|
next if child.type == Nokogiri::XML::Node::TEXT_NODE
# This turns it on, but won't turn itself off
in_section = true if child.name.start_with?("h") && child.text.downcase.include?(name)
# This turns it off when we reach a new header
in_section = false if child.name.start_with?("h") && !child.text.downcase.include?(name)
child.remove if in_section
end
main_element
end
def remove_two_linked_paragraphs main_element, before, after
paragraph_node = nil
nodes = main_element.children
nodes.each do |child|
next if child.type == Nokogiri::XML::Node::TEXT_NODE
paragraph_node = child if child.name == "p" && child.text.downcase.include?(before)
pre_node = child if child.name == "pre" && child.text.downcase.include?(after)
if paragraph_node && pre_node
index_difference = nodes.index(pre_node) - nodes.index(paragraph_node)
# We have to include the skipped text node, thus 2
if index_difference == 2
paragraph_node.remove
pre_node.remove
break
end
end
end
main_element
end
end