Permalink
Fetching contributors…
Cannot retrieve contributors at this time
1651 lines (1454 sloc) 45.7 KB
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="referrer" content="never">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta http-equiv="x-dns-prefetch-control" content="off">
<base target="_blank">
<link rel="alternate" type="application/rss+xml" title="RSS Feed" href="/feed/archivebot.rss">
<link rel="alternate" type="application/atom+xml" title="Atom Feed" href="/feed/archivebot.atom">
<link rel="icon" type="image/png" href="/assets/favicon.png">
<title>ArchiveBot dashboard</title>
<script>
(function() {
// Framebust out if necessary
if(self != top) {
var target = self.location.href;
// Ugly archivebot.com-specific hack
target = target.replace(
"http://arshboard.at.ninjawedding.org:4567/",
"http://dashboard.at.ninjawedding.org/");
top.location = target;
}
})();
</script>
</head>
<body>
<style>
html {
/* Always show scrollbar to prevent jumpiness when filtering */
overflow-y: scroll;
}
html, body {
background-color: #D0C0AE;
font-family: Tahoma, Arial, sans-serif;
font-size: 13px;
/**
* Opt out of Chrome's Scroll Anchoring, which causes this page to
* incorrectly scroll up as new lines appear in log windows.
* https://bugs.chromium.org/p/chromium/issues/detail?id=650017
* https://github.com/WICG/interventions/blob/0063fe5d3d0e086d4f963c8bd612d12b57db0784/scroll-anchoring/explainer.md#exclusion--opt-out-api
*/
overflow-anchor: none;
}
#filter-box {
background-color: #eee;
border: 1px solid #999;
padding: 1px 3px 1px 3px;
font-size: 18px;
border-radius: 3px;
}
.button {
font-size: 18px;
}
.padded-page {
padding: 20px 27px 20px 27px;
}
@media all and (min-width: 1440px) {
.padded-page {
padding: 20px 54px 47px 54px;
}
}
.header {
font-family: Arial, sans-serif;
font-weight: bold;
font-size: 18px;
margin: 0 0 20px 0;
display: flex;
align-items: flex-end;
justify-content: space-between;
flex-flow: row nowrap;
}
.job-header {
display: flex;
align-items: flex-end;
justify-content: space-between;
flex-flow: row nowrap;
cursor: default;
}
.stats-elements:hover {
background-color: #D8CBBC;
}
.job-info {
white-space: nowrap;
overflow: hidden;
}
.job-info-done {
color: #767676;
}
.job-info-aborted {
color: #9B00D7 !important;
}
.job-info-fatal {
color: #DD0000 !important;
}
.inline-stat {
/* Needed for 'Align!' feature */
display: inline-block;
/* Needed to avoid extra vertical padding */
vertical-align: bottom;
/* Needed to avoid collapsing of leading space */
white-space: pre;
}
.job-url {
font-family: Arial, sans-serif;
font-size: 14px;
font-weight: bold;
text-decoration: none;
color: inherit;
}
.job-url-aligned {
width: 260px;
overflow: hidden;
text-overflow: ellipsis;
}
.job-note-aligned {
display: none;
}
.job-nick-aligned {
width: 60px;
overflow: hidden;
text-overflow: hidden;
}
.job-mb-aligned {
width: 70px;
overflow: hidden;
text-overflow: hidden;
text-align: right;
}
.job-responses-aligned {
width: 92px;
overflow: hidden;
text-overflow: hidden;
text-align: right;
}
.job-in-queue-aligned {
width: 92px;
overflow: hidden;
text-overflow: hidden;
text-align: right;
}
.job-delay-aligned {
width: 130px;
overflow: hidden;
text-overflow: hidden;
text-align: right;
}
.job-ignores {
font-weight: bold;
}
.job-igoff {
font-weight: normal !important;
}
.job-ident {
font-family: Tahoma, Arial, sans-serif;
margin: 0 1px 0 0;
padding: 0;
border: 0;
background-color: #D0C0AE;
color: #645444;
text-align: right;
}
.log-window {
transition: height 0.18s ease;
background-color: #FFF7E1;
overflow-y: scroll;
height: 192px;
border: 1px solid #999;
margin: 0 0 1em 0;
border-radius: 3px;
}
.log-window-hidden {
height: 0;
border-width: 0 1px 0 1px;
margin: 0;
}
.log-window-stopped {
border: 1px solid #222;
box-shadow: 2px 2px 4px #888;
}
.log-window-expanded {
height: 384px;
}
.line-normal {
display: block;
white-space: pre;
width: 100%;
padding: 0 0 0 5px;
box-sizing: border-box;
}
.line-error {
display: block;
white-space: pre;
width: 100%;
background-color: #FFB9B9;
padding: 0 0 0 5px;
box-sizing: border-box;
}
.line-warning {
display: block;
white-space: pre;
width: 100%;
background-color: #F7DB7D;
padding: 0 0 0 5px;
box-sizing: border-box;
}
.line-redirect {
display: block;
white-space: pre;
width: 100%;
background-color: #E7CEEA;
padding: 0 0 0 5px;
box-sizing: border-box;
}
.line-ignore {
white-space: pre;
width: 100%;
color: #999;
padding: 0 0 0 5px;
box-sizing: border-box;
}
.line-stdout {
white-space: pre;
width: 100%;
background-color: #DCD8CB;
padding: 0 0 0 5px;
box-sizing: border-box;
}
a {
color: #000;
text-decoration: none;
}
a.ignore {
color: #999 !important;
}
.underlined-a {
text-decoration: underline;
}
.bold {
font-weight: bold;
}
#help {
background-color: #FFF7E1;
font-family: Arial, sans-serif;
font-size: 14px;
border-radius: 5px;
padding: 0.01em 1em 0.01em 1em;
margin-bottom: 1em;
}
#help p {
padding: 0.20em 0 0.20em 0;
}
#help p a {
text-decoration: underline;
}
.undisplayed {
display: none;
}
#context-menu {
padding: 2px 0 2px 0;
background-color: white;
border: 1px solid #BABABA;
box-shadow: 2px 2px 3px #8E8E8E;
position: fixed;
left: 0;
top: 0;
display: none;
cursor: default;
}
.context-menu-entry {
display: block;
white-space: nowrap;
overflow: hidden;
max-width: 960px;
height: 26px;
line-height: 26px;
padding-left: 26px;
padding-right: 26px;
font-family: 'Segoe UI', 'Helvetica Neue', sans-serif;
font-size: 12px;
cursor: default;
-webkit-touch-callout: none;
-webkit-user-select: none;
-khtml-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
.context-menu-entry:hover {
background-color: #4281F4;
color: #fff;
}
#clipboard-scratchpad {
height: 1px;
width: 1px;
padding: 0;
border: 0;
position: absolute;
top: 0;
}
</style>
<div id="context-menu"></div>
<div class="padded-page">
<div class="header">
<div>
<a href="http://archiveteam.org/index.php?title=ArchiveBot" class="underlined-a">ArchiveBot</a>
tracking <span id="num-crawls">0</span> crawls.
See also <a href="pipelines" class="underlined-a">pipeline</a> or <a href="logs/recent" class="underlined-a">job</a> reports.
Show: <input id="filter-box" type="text" size="21">
<input onclick="ds.setFilter('');" type="button" value=" All " class="button">
<input onclick="ds.setFilter('^$');" type="button" value="None" class="button">
</div>
<div>
<input type="button" onclick="ds.toggleAlign()" class="button" value="Align!">
<input type="button" onclick="ds.toggleHelp()" class="button" value="Help!">
</div>
</div>
<div id="critical-info">
<noscript>
Need JavaScript (ES5+) and WebSocket -&gt; TCP:4567
</noscript>
<div id="help" class="undisplayed">
<p>
This page shows all of the crawls that <a href="http://archiveteam.org/index.php?title=ArchiveBot">ArchiveBot</a> is currently running.
</p>
<p>
To pause scrolling, move your mouse inside a log window.
</p>
<p>
To show just one job, click anywhere on its stats line.
</p>
<p>
To clear all finished jobs, reload the page.
</p>
<p>
Keyboard shortcuts:
</p>
<ul>
<li><kbd>j</kbd> - show next job window
<li><kbd>k</kbd> - show previous job window
<li><kbd>a</kbd> - show all job windows
<li><kbd>n</kbd> - hide all job windows
<li><kbd>f</kbd> - move focus to filter box
<li><kbd>v</kbd> - open the job URL of the first-shown job window
<li><kbd>?</kbd> - show/hide help text
</ul>
<p>
The color coding for the job stats line is:
in progress,
<span class="job-info-done">finished normally</span>,
<span class="job-info-aborted">finished with abort</span>,
<span class="job-info-fatal">finished with fatal exception</span>.
</p>
<p>
Mouse over the job start date or the response count for additional information.
</p>
<p>
If your adblocker is enabled for this domain, you will see slower performance, and some URLs will not be displayed.
</p>
<p>
In Chrome 42+, Firefox 41+, and IE10+, a custom context menu is enabled when right-clicking URLs in the log windows below. It can be disabled by adding <kbd><span class="url-q-or-amp">?</span>contextMenu=0</kbd> to the dashboard URL. Firefox users: if you see both the normal and custom context menu, make sure <code>dom.event.contextmenu.enabled</code> is set to <code>true</code> in <code>about:config</code>.
</p>
<p>
For easier text selection in the log windows, add <kbd><span class="url-q-or-amp">?</span>moreDom=1</kbd> to the dashboard URL. (This uses ~25% more memory.)
</p>
<p>
To use ArchiveBot, drop by <a href="http://chat.efnet.org:9090/?nick=&channels=%23archivebot&Login=Login">#archivebot</a> on EFNet. <a href="http://archivebot.readthedocs.org/en/latest/">Issue commands</a> by typing them into the channel. You will need channel operator (@) or voice (+) status to issue archiving jobs; just ask for help or leave a message with the website you want to archive.
</p>
<p>
These <a href="https://github.com/ArchiveTeam/ArchiveBot/tree/master/db/ignore_patterns">ignore sets</a> are available for crawls. The <a href="https://github.com/ArchiveTeam/ArchiveBot/blob/master/db/ignore_patterns/global.json">global</a> ignore set automatically applies to all crawls.
</p>
<p>
On GitHub at <a href="https://github.com/ArchiveTeam/ArchiveBot">ArchiveTeam/ArchiveBot</a>.
</p>
<p>
You can <a href="/beta">try the alternate dashboard (beta) here.</a>
</p>
</div>
</div>
<div id="traffic"></div>
<div id="logs"></div>
</div>
<script>
"use strict";
var assert = function(condition, message) {
if(!condition) {
throw message || "Assertion failed";
}
};
var byId = function(id) {
return document.getElementById(id);
};
var text = function(s) {
return document.createTextNode(s);
};
/**
* Adaptation of ActiveSupport's #blank?.
*
* Returns true if the object is undefined, null, or is a string whose
* post-trim length is zero. Otherwise, returns false.
*/
var isBlank = function(o) {
return !o || o.trim().length === 0;
}
/**
* appendChild but accepts strings and arrays of children|strings
*/
var appendAny = function(e, thing) {
if(Array.isArray(thing)) {
for(var i=0; i < thing.length; i++) {
appendAny(e, thing[i]);
}
} else if(typeof thing == "string") {
e.appendChild(text(thing));
} else {
if(thing == null) {
throw Error("thing is " + JSON.stringify(thing));
}
e.appendChild(thing);
}
};
/**
* Create DOM element with attributes and children from Array<node|string>|node|string
*/
var h = function(elem, attrs, thing) {
var e = document.createElement(elem);
if(attrs != null) {
for(var attr in attrs) {
if(attr == "spellcheck" || attr == "readonly") {
e.setAttribute(attr, attrs[attr]);
} else if(attr == "class") {
throw new Error("Did you mean className?");
} else {
e[attr] = attrs[attr];
}
}
}
if(thing != null) {
appendAny(e, thing);
}
return e;
};
var href = function(href, text) {
var a = h("a");
a.href = href;
a.textContent = text;
return a;
};
var removeChildren = function(elem) {
while(elem.firstChild) {
elem.removeChild(elem.firstChild);
}
};
var prettyJson = function(obj) {
return JSON.stringify(obj, undefined, 2);
};
// Copied from Coreweb/js_coreweb/cw/string.js
/**
* Like Python's s.split(delim, num) and s.split(delim)
* This does *NOT* implement Python's no-argument s.split()
*
* @param {string} s The string to split.
* @param {string} sep The separator to split by.
* @param {number} maxsplit Maximum number of times to split.
*
* @return {!Array.<string>} The splitted string, as an array.
*/
var split = function(s, sep, maxsplit) {
assert(typeof sep == "string",
"arguments[1] of split must be a separator string");
if(maxsplit === undefined || maxsplit < 0) {
return s.split(sep);
}
var pieces = s.split(sep);
var head = pieces.splice(0, maxsplit);
// after the splice, pieces is shorter and no longer has the `head` elements.
if(pieces.length > 0) {
var tail = pieces.join(sep);
head.push(tail); // no longer just the head.
}
return head;
};
// Copied from closure-library's goog.string.startsWith
var startsWith = function(str, prefix) {
return str.lastIndexOf(prefix, 0) == 0;
}
// Copied from closure-library's goog.string.endsWith
var endsWith = function(str, suffix) {
var l = str.length - suffix.length;
return l >= 0 && str.indexOf(suffix, l) == l;
};
// Based on closure-library's goog.string.regExpEscape
var regExpEscape = function(s) {
var escaped = String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1').
replace(/\x08/g, '\\x08');
if(s.indexOf('[') == -1 && s.indexOf(']') == -1) {
// If there were no character classes, there can't have been any need
// to escape -, to unescape them.
escaped = escaped.replace(/\\-/g, "-");
}
return escaped;
};
/**
* [[1, 2], [3, 4]] -> {1: 2, 3: 4}
*/
var intoObject = function(arr) {
var obj = {};
arr.forEach(function(e) {
obj[e[0]] = e[1];
});
return obj;
};
var getQueryArgs = function() {
var pairs = location.search.replace("?", "").split("&");
if(pairs == "") {
return {};
}
return intoObject(pairs.map(function(e) { return split(e, "=", 1); }));
};
var getChromeMajorVersion = function() {
return Number(navigator.userAgent.match(/Chrome\/(\d+)/)[1]);
};
var getFirefoxMajorVersion = function() {
return Number(navigator.userAgent.match(/Firefox\/(\d+)/)[1]);
};
var getTridentMajorVersion = function() {
return Number(navigator.userAgent.match(/Trident\/(\d+)/)[1]);
};
var isChrome = navigator.userAgent.indexOf("Chrome/") != -1;
var isSafari = !isChrome && navigator.userAgent.indexOf("Safari") != -1;
var isFirefox = navigator.userAgent.indexOf("Firefox") != -1;
var isTrident = navigator.userAgent.indexOf("Trident/") != -1;
var addAnyChangeListener = function(elem, func) {
// DOM0 handler for convenient use by Clear button
elem.onchange = func;
elem.addEventListener('keydown', func, false);
elem.addEventListener('paste', func, false);
elem.addEventListener('input', func, false);
};
var arrayFrom = function(arrayLike) {
return Array.prototype.slice.call(arrayLike);
};
/**
* Returns a function that gets the given property on any object passed in
*/
var prop = function(name) {
return function(obj) {
return obj[name];
};
};
/**
* Returns a function that adds the given class to any element passed in
*/
var classAdder = function(name) {
return function(elem) {
elem.classList.add(name);
};
};
/**
* Returns a function that removes the given class to any element passed in
*/
var classRemover = function(name) {
return function(elem) {
elem.classList.remove(name);
};
};
var removeFromArray = function(arr, item) {
var idx = arr.indexOf(item);
if(idx != -1) {
arr.splice(idx, 1);
}
};
// Based on http://stackoverflow.com/a/18520276
var findInArray = function(arr, test, ctx) {
var result = null;
arr.some(function(el, i) {
return test.call(ctx, el, i, arr) ? ((result = i), true) : false;
});
return result;
};
/*** End of utility code ***/
var JobsTracker = function() {
this.known = {};
this.sorted = [];
this.finishedArray = [];
this.finishedSet = {};
this.fatalExceptionSet = {};
};
JobsTracker.prototype.countActive = function() {
return this.sorted.length - this.finishedArray.length;
};
JobsTracker.prototype.resort = function() {
this.sorted.sort(function(a, b) { return a["started_at"] > b["started_at"] ? -1 : 1 });
};
/**
* Returns true if a new job was added
*/
JobsTracker.prototype.handleJobData = function(jobData) {
var ident = jobData["ident"];
var alreadyKnown = ident in this.known;
if(!alreadyKnown) {
this.known[ident] = true;
this.sorted.push(jobData);
this.resort();
}
return !alreadyKnown;
};
JobsTracker.prototype.markFinished = function(ident) {
if(!(ident in this.finishedSet)) {
this.finishedSet[ident] = true;
this.finishedArray.push(ident);
}
};
JobsTracker.prototype.markUnfinished = function(ident) {
if(ident in this.finishedSet) {
delete this.finishedSet[ident];
removeFromArray(this.finishedArray, ident);
}
// Job was restarted, so unmark fatal exception
if(ident in this.fatalExceptionSet) {
delete this.fatalExceptionSet[ident];
}
};
JobsTracker.prototype.markFatalException = function(ident) {
this.fatalExceptionSet[ident] = true;
};
JobsTracker.prototype.hasFatalException = function(ident) {
return ident in this.fatalExceptionSet;
};
var JobRenderInfo = function(logWindow, logSegment, statsElements, jobNote, lineCountWindow, lineCountSegments) {
this.logWindow = logWindow;
this.logSegment = logSegment;
this.statsElements = statsElements;
this.jobNote = jobNote;
this.lineCountWindow = lineCountWindow;
this.lineCountSegments = lineCountSegments;
};
var Reusable = {
obj_className_line_normal: {"className": "line-normal"},
obj_className_line_error: {"className": "line-error"},
obj_className_line_warning: {"className": "line-warning"},
obj_className_line_redirect: {"className": "line-redirect"},
//
obj_className_line_ignore: {"className": "line-ignore"},
obj_className_line_stdout: {"className": "line-stdout"},
obj_className_bold: {"className": "bold"}
};
// http://stackoverflow.com/questions/2901102/how-to-print-a-number-with-commas-as-thousands-separators-in-javascript
var numberWithCommas = function(s_or_n) {
return ("" + s_or_n).replace(/\B(?=(\d{3})+(?!\d))/g, ",");
};
var toStringTenths = function(n) {
var s = "" + (Math.round(10 * n) / 10);
if(s.indexOf(".") == -1) {
s += ".0";
}
return s;
};
var getTotalResponses = function(jobData) {
return (
parseInt(jobData["r1xx"]) +
parseInt(jobData["r2xx"]) +
parseInt(jobData["r3xx"]) +
parseInt(jobData["r4xx"]) +
parseInt(jobData["r5xx"]) +
parseInt(jobData["runk"]));
};
var getSummaryResponses = function(jobData) {
return (
"1xx: " + numberWithCommas(jobData["r1xx"]) + "\n" +
"2xx: " + numberWithCommas(jobData["r2xx"]) + "\n" +
"3xx: " + numberWithCommas(jobData["r3xx"]) + "\n" +
"4xx: " + numberWithCommas(jobData["r4xx"]) + "\n" +
"5xx: " + numberWithCommas(jobData["r5xx"]) + "\n" +
"Unknown: " + numberWithCommas(jobData["runk"]));
};
var JobsRenderer = function(container, filterBox, historyLines, showNicks, contextMenuRenderer) {
this.container = container;
this.filterBox = filterBox;
addAnyChangeListener(this.filterBox, this.applyFilter.bind(this));
this.filterBox.onkeypress = function(ev) {
// So that j or k in input box does not result in job window switching
ev.stopPropagation();
}
this.historyLines = historyLines;
this.showNicks = showNicks;
this.contextMenuRenderer = contextMenuRenderer;
this.linesPerSegment = Math.max(1, Math.round(this.historyLines / 10));
this.jobs = new JobsTracker();
// ident -> JobRenderInfo
this.renderInfo = {};
this.mouseInside = null;
this.numCrawls = byId('num-crawls');
this.aligned = false;
};
JobsRenderer.prototype._getNextJobInSorted = function(ident) {
for(var i=0; i < this.jobs.sorted.length; i++) {
var e = this.jobs.sorted[i];
if(e["ident"] == ident) {
return this.jobs.sorted[i+1];
}
}
return null;
};
JobsRenderer.prototype._createLogSegment = function() {
return h('div');
};
JobsRenderer.prototype._createLogContainer = function(jobData) {
var ident = jobData["ident"];
var beforeJob = this._getNextJobInSorted(ident);
var beforeElement = beforeJob == null ? null : byId("log-container-" + beforeJob["ident"]);
var logSegment = this._createLogSegment();
var logWindowAttrs = {
"className": "log-window",
"id": "log-window-" + ident,
"onmouseenter": function(ev) {
this.mouseInside = ident;
ev.target.classList.add('log-window-stopped');
}.bind(this),
"onmouseleave": function(ev) {
var leave = function() {
this.mouseInside = null;
ev.target.classList.remove('log-window-stopped');
}.bind(this);
// When our custom context menu pops up, it causes onmouseleave on the
// log window, so make our leave callback fire only after the context
// menu is closed.
if(this.contextMenuRenderer.visible) {
this.contextMenuRenderer.callAfterBlur(leave);
} else {
leave();
}
}.bind(this)
}
// If you reach the end of a log window, the browser annoyingly
// starts to scroll the page instead. We prevent this behavior here.
// If the user wants to scroll the page, they need to move their
// mouse outside a log window first.
if(!isSafari) {
logWindowAttrs["onwheel"] = function(ev) {
// Note: offsetHeight is "wrong" by 2px but it doesn't matter
//console.log(ev, logWindow.scrollTop, (logWindow.scrollHeight - logWindow.offsetHeight));
if(ev.deltaY < 0 && logWindow.scrollTop == 0) {
ev.preventDefault();
} else if(ev.deltaY > 0 && logWindow.scrollTop >= (logWindow.scrollHeight - logWindow.offsetHeight)) {
ev.preventDefault();
}
}
} else {
// Safari 7.0.5 can't preventDefault or stopPropagation an onwheel event,
// so use onmousewheel instead.
logWindowAttrs["onmousewheel"] = function(ev) {
//console.log(ev, logWindow.scrollTop, (logWindow.scrollHeight - logWindow.offsetHeight));
if(ev.wheelDeltaY > 0 && logWindow.scrollTop == 0) {
ev.preventDefault();
} else if(ev.wheelDeltaY < 0 && logWindow.scrollTop >= (logWindow.scrollHeight - logWindow.offsetHeight)) {
ev.preventDefault();
}
}
}
var statsElements = {
mb: h("span", {"className": "inline-stat job-mb"}, "?"),
responses: h("span", {"className": "inline-stat job-responses"}, "?"),
responsesPerSecond: h("span", null, "?"),
queueLength: h("span", {"className": "inline-stat job-in-queue"}, "? in q."),
connections: h("span", null, "?"),
delay: h("span", {"className": "inline-stat job-delay"}, "? ms delay"),
ignores: h("span", {"className": "job-ignores"}, "?"),
jobInfo: null /* set later */
};
var startedISOString = new Date(parseFloat(jobData["started_at"]) * 1000).toISOString();
var jobNote = h("span", {"className": "job-note"}, null);
statsElements.jobInfo = h(
"span", {"className": "job-info"}, [
h("a", {"className": "inline-stat job-url", "href": jobData["url"]}, jobData["url"]),
// Clicking anywhere in this area will set the filter to a regexp that
// matches only this job URL, thus hiding everything but this job.
h("span", {
"className": "stats-elements",
"onclick": function() {
var filter = ds.getFilter();
if(RegExp(filter).test(jobData["url"]) && startsWith(filter, "^") && endsWith(filter, "$")) {
// If we're already showing just this log window, go back
// to showing nothing.
ds.setFilter("^$");
} else {
ds.setFilter("^" + regExpEscape(jobData["url"]) + "$");
}
}
}, [
" on ",
h("span", {"title": startedISOString}, startedISOString.split("T")[0].substr(5)),
h("span", {"className": "inline-stat job-nick"}, (this.showNicks ? " by " + jobData["started_by"] : "")),
jobNote,
"; ",
statsElements.mb,
" MB in ",
statsElements.responses,
" at ",
statsElements.responsesPerSecond,
"/s, ",
statsElements.queueLength,
"; ",
statsElements.connections,
" con. w/ ",
statsElements.delay,
"; ",
statsElements.ignores
])
]
);
var logWindow = h('div', logWindowAttrs, logSegment);
var div = h(
'div',
{"id": "log-container-" + ident}, [
h("div", {"className": "job-header"}, [
statsElements.jobInfo,
h("input", {
"className": "job-ident",
"type": "text",
"value": ident,
"size": "28",
"spellcheck": "false",
"readonly": "",
"onclick": function() { this.select(); }
})
]),
logWindow
]
);
this.renderInfo[ident] = new JobRenderInfo(logWindow, logSegment, statsElements, jobNote, 0, [0]);
this.container.insertBefore(div, beforeElement);
// Set appropriate CSS classes - we might be in aligned mode already
this.updateAlign();
// Filter hasn't changed, but we might need to filter out the new job, or
// add/remove log-window-expanded class
this.applyFilter();
}
JobsRenderer.prototype._renderDownloadLine = function(data, logSegment) {
if(data["is_warning"]) {
var attrs = {"className": "line-warning", "href": data["url"]};
} else if(data["is_error"]) {
var attrs = {"className": "line-error", "href": data["url"]};
} else if(data["response_code"] && data["response_code"] >= 300 && data["response_code"] < 400) {
var attrs = {"className": "line-redirect", "href": data["url"]};
} else {
var attrs = {"className": "line-normal", "href": data["url"]};
}
logSegment.appendChild(
h("a", attrs, data["response_code"] + " " + data["wget_code"] + " " + data["url"])
);
return 1;
};
/**
* Like _renderDownloadLine, but makes it easier to start a text selection from the
* left or right of the URL.
*/
JobsRenderer.prototype._moreDomRenderDownloadLine = function(data, logSegment) {
if(data["is_warning"]) {
var attrs = Reusable.obj_className_line_warning;
} else if(data["is_error"]) {
var attrs = Reusable.obj_className_line_error;
} else if(data["response_code"] && data["response_code"] >= 300 && data["response_code"] < 400) {
var attrs = Reusable.obj_className_line_redirect;
} else {
var attrs = Reusable.obj_className_line_normal;
}
logSegment.appendChild(h("div", attrs, [
data["response_code"] + " " + data["wget_code"] + " ",
h("a", {"href": data["url"], "className": "log-url"}, data["url"])
]));
return 1;
};
JobsRenderer.prototype._renderIgnoreLine = function(data, logSegment) {
var attrs = Reusable.obj_className_line_ignore;
var source = data["source"];
var ignoreSpan;
if (source != null) {
ignoreSpan = h('span', null, " IGNOR (" + source + "): ");
} else {
ignoreSpan = h('span', null, " IGNOR ");
}
logSegment.appendChild(h("div", attrs, [
ignoreSpan,
h('a', {"href": data["url"], "className": "ignore"}, data["url"]),
h('span', Reusable.obj_className_bold, " by "),
data["pattern"]
]));
return 1;
};
JobsRenderer.prototype._renderStdoutLine = function(data, logSegment, info, ident) {
var cleanedMessage = data["message"].replace(/[\r\n]+$/, "");
var renderedLines = 0;
if(!cleanedMessage) {
return renderedLines;
}
var lines = cleanedMessage.split("\n");
for(var i=0; i < lines.length; i++) {
var line = lines[i];
if(!line) {
continue;
}
logSegment.appendChild(h("div", Reusable.obj_className_line_stdout, line));
renderedLines += 1;
// Check for 'Finished RsyncUpload for Item'
// instead of 'Starting MarkItemAsDone for Item'
// because the latter is often missing
if(/^Finished RsyncUpload for Item/.test(line)) {
info.statsElements.jobInfo.classList.add('job-info-done');
this.jobs.markFinished(ident);
} else if(/^CRITICAL (Sorry|Please report)|^ERROR Fatal exception|No space left on device|^Fatal Python error:|^(Thread|Current thread) 0x/.test(line)) {
info.statsElements.jobInfo.classList.add('job-info-fatal');
this.jobs.markFatalException(ident);
} else if(/Script requested immediate stop|^Adjusted target WARC path to.*-aborted$/.test(line)) {
// Note: above message can be in:
// ERROR Script requested immediate stop
// or after an ERROR Fatal exception:
// wpull.hook.HookStop: Script requested immediate stop.
//
// Also check for "Adjusted target WARC path" because
// the exception may be entirely missing.
info.statsElements.jobInfo.classList.remove('job-info-fatal');
info.statsElements.jobInfo.classList.add('job-info-aborted');
} else if(/^Received item /.test(line)) {
// Clear other statuses if a job restarts with the same job ID
info.statsElements.jobInfo.classList.remove('job-info-done');
info.statsElements.jobInfo.classList.remove('job-info-fatal');
info.statsElements.jobInfo.classList.remove('job-info-aborted');
this.jobs.markUnfinished(ident);
}
}
return renderedLines;
};
JobsRenderer.prototype.handleData = function(data) {
var jobData = data["job_data"];
var added = this.jobs.handleJobData(jobData);
this.numCrawls.textContent = this.jobs.countActive();
if(added) {
this._createLogContainer(jobData);
}
var type = data["type"];
var ident = jobData["ident"];
var info = this.renderInfo[ident];
if(!info) {
console.warn("No render info for " + ident);
return;
}
var totalResponses = parseInt(getTotalResponses(jobData));
if(type == "download") {
var linesRendered = this._renderDownloadLine(data, info.logSegment);
} else if(type == "stdout") {
var linesRendered = this._renderStdoutLine(data, info.logSegment, info, ident);
} else if(type == "ignore") {
var linesRendered = this._renderIgnoreLine(data, info.logSegment);
} else {
assert(false, "Unexpected message type " + type);
}
// Update stats
info.statsElements.mb.textContent =
numberWithCommas(
toStringTenths(
(parseInt(jobData["bytes_downloaded"]) / (1024 * 1024)).toString()));
info.statsElements.responses.textContent =
numberWithCommas(totalResponses) + " resp.";
info.statsElements.responses.title = getSummaryResponses(jobData);
var duration = Date.now()/1000 - parseFloat(jobData["started_at"]);
info.statsElements.responsesPerSecond.textContent =
toStringTenths(totalResponses/duration);
if (jobData["items_queued"] && jobData["items_downloaded"]) {
var totalQueued = parseInt(jobData["items_queued"], 10);
var totalDownloaded = parseInt(jobData["items_downloaded"], 10);
info.statsElements.queueLength.textContent =
numberWithCommas((totalQueued - totalDownloaded) + " in q.");
info.statsElements.queueLength.title =
numberWithCommas(totalQueued) + " queued\n" +
numberWithCommas(totalDownloaded) + " downloaded";
}
info.statsElements.connections.textContent = jobData["concurrency"];
var delayMin = parseInt(jobData["delay_min"]);
var delayMax = parseInt(jobData["delay_max"]);
info.statsElements.delay.textContent =
(delayMin == delayMax ?
delayMin :
delayMin + "-" + delayMax) + " ms delay";
if(jobData["suppress_ignore_reports"]) {
info.statsElements.ignores.textContent = 'igoff';
if(!info.statsElements.ignores.classList.contains('job-igoff')) {
info.statsElements.ignores.classList.add('job-igoff');
}
} else {
info.statsElements.ignores.textContent = 'igon';
if(info.statsElements.ignores.classList.contains('job-igoff')) {
info.statsElements.ignores.classList.remove('job-igoff');
}
}
// Update note
info.jobNote.textContent =
isBlank(jobData["note"]) ?
"" :
" (" + jobData["note"] + ")";
info.lineCountWindow += linesRendered;
info.lineCountSegments[info.lineCountSegments.length - 1] += linesRendered;
if(info.lineCountSegments[info.lineCountSegments.length - 1] >= this.linesPerSegment) {
//console.log("Created new segment", info);
var newSegment = this._createLogSegment();
info.logWindow.appendChild(newSegment);
info.logSegment = newSegment;
info.lineCountSegments.push(0);
}
if(this.mouseInside != ident) {
// Don't remove any scrollback information when the job has a fatal exception,
// so that the user can find the traceback and report a bug.
if(!this.jobs.hasFatalException(ident)) {
// We may have to remove more than one segment, if the user
// has paused the log window for a while.
while(info.lineCountWindow >= this.historyLines + this.linesPerSegment) {
var firstLogSegment = info.logWindow.firstChild;
assert(firstLogSegment != null, "info.logWindow.firstChild is null; " +
JSON.stringify({
"lineCountWindow": info.lineCountWindow,
"lineCountSegments": info.lineCountSegments}));
info.logWindow.removeChild(firstLogSegment);
info.lineCountWindow -= info.lineCountSegments[0];
info.lineCountSegments.shift();
}
}
// Scroll to the bottom
// To avoid serious performance problems in Firefox, we use a big number
// instead of info.logWindow.scrollHeight.
info.logWindow.scrollTop = 999999;
}
};
JobsRenderer.prototype.applyFilter = function() {
var query = this.filterBox.value;
var matches = 0;
var matchedWindows = [];
var unmatchedWindows = [];
this.firstFilterMatch = null;
for(var i=0; i < this.jobs.sorted.length; i++) {
var job = this.jobs.sorted[i];
var w = this.renderInfo[job["ident"]].logWindow;
if(!RegExp(query).test(job["url"])) {
w.classList.add("log-window-hidden");
// Firefox exhibits serious performance problems when adding
// lines to our 0px-high log windows, so add display: none
// (effectively killing the animation)
if(isFirefox) {
w.style.display = "none";
}
// Remove this class, else an ugly border may be visible
w.classList.remove('log-window-stopped');
unmatchedWindows.push(w);
} else {
w.classList.remove("log-window-hidden");
if(isFirefox) {
w.style.display = "block";
}
matches += 1;
matchedWindows.push(w);
if(this.firstFilterMatch == null) {
this.firstFilterMatch = job;
}
}
}
// If there's only one visible log window, expand it so that more lines are visible.
unmatchedWindows.map(classRemover('log-window-expanded'));
matchedWindows.map(classRemover('log-window-expanded'));
if(matches == 1) {
matchedWindows.map(classAdder('log-window-expanded'));
}
if(matches < this.jobs.sorted.length) {
// If you're not seeing all of the log windows, you're probably seeing very
// few of them, so you probably want alignment enabled.
this.aligned = true;
this.updateAlign();
} else {
// You're seeing all of the log windows, so alignment doesn't help as much
// as seeing the full info.
this.aligned = false;
this.updateAlign();
}
};
JobsRenderer.prototype.showNextPrev = function(offset) {
var idx;
if(this.firstFilterMatch == null) {
idx = null;
} else {
idx = findInArray(this.jobs.sorted, function(el, i) {
return el["ident"] == this.firstFilterMatch["ident"];
}.bind(this));
}
if(idx == null) {
// If no job windows are shown, set up index to make j show the first job window,
// k the last job window.
idx = this.jobs.sorted.length;
}
idx = idx + offset;
// When reaching either end, hide all job windows. When going past
// the end, wrap around.
if(idx == -1) {
idx = this.jobs.sorted.length;
} else if(idx == this.jobs.sorted.length + 1) {
idx = 0;
}
if(idx == this.jobs.sorted.length) {
ds.setFilter("^$");
} else {
var newShownJob = this.jobs.sorted[idx];
ds.setFilter("^" + regExpEscape(newShownJob["url"]) + "$");
}
};
JobsRenderer.prototype.updateAlign = function() {
var adderOrRemover = this.aligned ? classAdder : classRemover;
arrayFrom(document.querySelectorAll('.job-url')).map(adderOrRemover('job-url-aligned'));
arrayFrom(document.querySelectorAll('.job-note')).map(adderOrRemover('job-note-aligned'));
arrayFrom(document.querySelectorAll('.job-nick')).map(adderOrRemover('job-nick-aligned'));
arrayFrom(document.querySelectorAll('.job-mb')).map(adderOrRemover('job-mb-aligned'));
arrayFrom(document.querySelectorAll('.job-responses')).map(adderOrRemover('job-responses-aligned'));
arrayFrom(document.querySelectorAll('.job-in-queue')).map(adderOrRemover('job-in-queue-aligned'));
arrayFrom(document.querySelectorAll('.job-delay')).map(adderOrRemover('job-delay-aligned'));
};
JobsRenderer.prototype.toggleAlign = function() {
this.aligned = !this.aligned;
this.updateAlign();
};
/**
* This context menu pops up when you right-click on a URL in
* a log window, helping you copy an !ig command based on the URL
* you right-clicked.
*/
var ContextMenuRenderer = function() {
this.visible = false;
this.callAfterBlurFns = [];
this.element = byId('context-menu');
};
/**
* Returns true if the event target is a URL in a log window
*/
ContextMenuRenderer.prototype.clickedOnLogWindowURL = function(ev) {
var cn = ev.target.className;
return cn == "line-normal" || cn == "line-error" || cn == "line-warning" || cn == "line-redirect" || cn == "log-url";
};
ContextMenuRenderer.prototype.makeCopyTextFn = function(text) {
return function() {
var clipboardScratchpad = byId('clipboard-scratchpad');
clipboardScratchpad.value = text;
clipboardScratchpad.focus();
clipboardScratchpad.select();
document.execCommand('copy');
}.bind(this);
};
ContextMenuRenderer.prototype.getPathVariants = function(path) {
var paths = [path];
// Avoid generating a duplicate suggestion
path = path.replace(/\/$/, "");
while(path && path.lastIndexOf('/') != -1) {
path = path.replace(/\/[^\/]*$/, "");
paths.push(path + '/');
}
return paths;
};
ContextMenuRenderer.prototype.getSuggestedCommands = function(ident, url) {
// TODO:
// !d IDENT 180000 180000 (if !d is currently low)
// !d IDENT 250 375 (if !d is currently high)
// !con IDENT 1 (if > 1)
// !con IDENT 3 (if < 3)
var schema = url.split(':')[0];
var domain = url.split('/')[2];
var withoutQuery = url.split('?')[0];
var path = '/' + split(withoutQuery, '/', 3)[3];
var reSchema = startsWith(schema, 'http') ? 'https?' : 'ftp';
return this.getPathVariants(path).map(function(p) {
return "!ig " + ident + " ^" + reSchema + "://" + regExpEscape(domain + p);
}).concat([
"!d " + ident + " 180000 180000"
,"!con " + ident + " 1"
]);
};
ContextMenuRenderer.prototype.makeEntries = function(ident, url) {
var commands = this.getSuggestedCommands(ident, url).map(function(c) {
return h(
'span',
{'onclick': this.makeCopyTextFn(c)},
"Copy " + c.replace(" " + ident + " ", "")
);
}.bind(this));
return [
// Unfortunately, this does not open it in a background tab
// like the real context menu does.
h('a', {'href': url}, "Open link in new tab")
,h('span', {'onclick': this.makeCopyTextFn(url)}, "Copy link address")
].concat(commands);
};
ContextMenuRenderer.prototype.onContextMenu = function(ev) {
//console.log(ev);
if(!this.clickedOnLogWindowURL(ev)) {
this.blur();
return;
}
ev.preventDefault();
this.visible = true;
this.element.style.display = "block";
this.element.style.left = ev.clientX + "px";
this.element.style.top = ev.clientY + "px";
removeChildren(this.element);
// We put the clipboard-scratchpad in the fixed-positioned
// context menu instead of elsewhere on the page, because
// we must focus the input box to automatically copy its text,
// and the focus operation scrolls to the element on the page,
// and we want to avoid such scrolling.
appendAny(this.element, h('input', {'type': 'text', 'id': 'clipboard-scratchpad'}));
var url = ev.target.href;
try {
var ident = ev.target.parentNode.parentNode.id.match(/^log-window-(.*)/)[1];
} catch(e) {
// moreDom=1
var ident = ev.target.parentNode.parentNode.parentNode.id.match(/^log-window-(.*)/)[1];
}
var entries = this.makeEntries(ident, url);
for(var i=0; i < entries.length; i++) {
var entry = entries[i];
entry.classList.add('context-menu-entry');
appendAny(this.element, entry);
}
// If the bottom of the context menu is outside the viewport, move the context
// menu up, so that it appears to have opened from its bottom-left corner.
// + 1 pixel so that the pointer lands inside the element and turns on cursor: default
if(ev.clientY + this.element.offsetHeight > document.documentElement.clientHeight) {
this.element.style.top = (ev.clientY - this.element.offsetHeight + 1) + "px";
}
};
ContextMenuRenderer.prototype.blur = function() {
this.visible = false;
this.element.style.display = "none";
this.callAfterBlurFns.map(function(fn) { fn(); });
this.callAfterBlurFns = [];
};
// TODO: decouple - fire an onblur event instead
ContextMenuRenderer.prototype.callAfterBlur = function(fn) {
this.callAfterBlurFns.push(fn);
};
var BatchingQueue = function(callable, minInterval) {
this.callable = callable;
this._minInterval = minInterval;
this.queue = [];
this._timeout = null;
this._boundRunCallable = this._runCallable.bind(this);
};
BatchingQueue.prototype.setMinInterval = function(minInterval) {
this._minInterval = minInterval;
};
BatchingQueue.prototype._runCallable = function() {
this._timeout = null;
var queue = this.queue;
this.queue = [];
this.callable(queue);
};
BatchingQueue.prototype.callNow = function() {
if(this._timeout !== null) {
clearTimeout(this._timeout);
this._timeout = null;
}
this._runCallable();
};
BatchingQueue.prototype.push = function(v) {
this.queue.push(v);
if(this._timeout === null) {
this._timeout = setTimeout(this._boundRunCallable, this._minInterval);
}
};
var Decayer = function(initial, multiplier, max) {
this.initial = initial;
this.multiplier = multiplier;
this.max = max;
this.reset();
};
Decayer.prototype.reset = function() {
// First call to .decay() will multiply, but we want to get the `intitial`
// value on the first call to .decay(), so divide.
this.current = this.initial / this.multiplier;
return this.current;
};
Decayer.prototype.decay = function() {
this.current = Math.min(this.current * this.multiplier, this.max);
return this.current;
};
var Dashboard = function() {
this.messageCount = 0;
var args = getQueryArgs();
var historyLines =
args["historyLines"] ?
Number(args["historyLines"]) :
navigator.userAgent.match(/Mobi/) ? 250 : 1000;
var batchTimeWhenVisible =
args["batchTimeWhenVisible"] ?
Number(args["batchTimeWhenVisible"]) :
125;
var showNicks =
args["showNicks"] ?
Boolean(Number(args["showNicks"])) :
false;
var contextMenu =
args["contextMenu"] ?
Boolean(Number(args["contextMenu"])) :
/**
* The context menu is useless without document.execCommand('copy'),
* which is available in IE10+, Chrome 42+, and Firefox 41+
*/
((isTrident && getTridentMajorVersion() >= 6) ||
(isChrome && getChromeMajorVersion() >= 42) ||
(isFirefox && getFirefoxMajorVersion() >= 41));
var moreDom =
args["moreDom"] ?
Boolean(Number(args["moreDom"])) :
false;
// Append to page title to make it possible to identify the tab in Chrome's task manager
if(args["title"]) {
document.title += " - " + args["title"];
}
if(moreDom) {
JobsRenderer.prototype._renderDownloadLine = JobsRenderer.prototype._moreDomRenderDownloadLine;
}
this.host = args["host"] ? args["host"] : location.host;
this.dumpTraffic = args["dumpMax"] && Number(args["dumpMax"]) > 0;
if(this.dumpTraffic) {
this.dumpMax = Number(args["dumpMax"]);
}
this.contextMenuRenderer = new ContextMenuRenderer(document);
if(contextMenu) {
document.oncontextmenu = this.contextMenuRenderer.onContextMenu.bind(this.contextMenuRenderer);
document.onclick = this.contextMenuRenderer.blur.bind(this.contextMenuRenderer);
// onkeydown picks up ESC, onkeypress doesn't (tested Chrome 44)
document.onkeydown = function(ev) {
if(ev.keyCode == 27) { // ESC
this.contextMenuRenderer.blur();
}
}.bind(this);
// In Chrome, the native context menu disappears when you wheel around, so
// match that behavior for our own context menu.
if(isChrome) {
document.onwheel = function(ev) {
this.contextMenuRenderer.blur();
}.bind(this);
}
}
this.jobsRenderer = new JobsRenderer(
byId('logs'), byId('filter-box'), historyLines, showNicks, this.contextMenuRenderer);
var batchTimeWhenHidden = 5000;
var xhr = new XMLHttpRequest();
xhr.onload = function() {
var recentLines = JSON.parse(xhr.responseText);
for(var i=0; i < recentLines.length; i++) {
this.handleData(recentLines[i]);
}
this.queue = new BatchingQueue(function(queue) {
//console.log("Queue has ", queue.length, "items");
for(var i=0; i < queue.length; i++) {
this.handleData(JSON.parse(queue[i]));
}
}.bind(this), batchTimeWhenVisible);
this.decayer = new Decayer(1000, 1.5, 60000);
this.connectWebSocket();
document.addEventListener("visibilitychange", function() {
if(document.hidden) {
//console.log("Page has become hidden");
this.queue.setMinInterval(batchTimeWhenHidden);
} else {
//console.log("Page has become visible");
this.queue.setMinInterval(batchTimeWhenVisible);
this.queue.callNow();
}
}.bind(this), false);
}.bind(this);
xhr.open("GET", "//" + this.host + "/logs/recent?cb=" + Date.now() + Math.random());
xhr.setRequestHeader('Accept', 'application/json');
xhr.send("");
document.onkeypress = this.keyPress.bind(this);
// Adjust help text based on URL
Array.prototype.slice.call(document.querySelectorAll('.url-q-or-amp')).map(function(elem) {
if(window.location.search.indexOf("?") != -1) {
elem.textContent = "&";
}
});
if(!showNicks) {
document.write('<style>.job-nick-aligned { width: 0; }</style>');
}
};
Dashboard.prototype.keyPress = function(ev) {
//console.log(ev);
if(ev.which == 106) { // j
this.jobsRenderer.showNextPrev(1);
} else if(ev.which == 107) { // k
this.jobsRenderer.showNextPrev(-1);
} else if(ev.which == 97) { // a
ds.setFilter('');
} else if(ev.which == 110) { // n
ds.setFilter('^$');
} else if(ev.which == 102) { // f
ev.preventDefault();
byId('filter-box').focus();
byId('filter-box').select();
} else if(ev.which == 118) { // v
window.open(this.jobsRenderer.firstFilterMatch["url"]);
} else if(ev.which == 63) { // ?
ds.toggleHelp();
}
};
Dashboard.prototype.handleData = function(data) {
this.messageCount += 1;
if(this.dumpTraffic && this.messageCount <= this.dumpMax) {
byId('traffic').appendChild(h("pre", null, prettyJson(data)));
}
this.jobsRenderer.handleData(data);
};
Dashboard.prototype.connectWebSocket = function() {
var wsproto = window.location.protocol === "https:" ? "wss:" : "ws:";
this.ws = new WebSocket(wsproto + "//" + this.host + "/stream");
this.ws.onmessage = function(ev) {
this.queue.push(ev["data"]);
}.bind(this);
this.ws.onopen = function(ev) {
console.log("WebSocket opened:", ev);
this.decayer.reset();
}.bind(this);
this.ws.onclose = function(ev) {
console.log("WebSocket closed:", ev);
var delay = this.decayer.decay();
console.log("Reconnecting in", delay, "ms");
setTimeout(this.connectWebSocket.bind(this), delay);
}.bind(this);
};
Dashboard.prototype.toggleAlign = function() {
this.jobsRenderer.toggleAlign();
};
Dashboard.prototype.toggleHelp = function() {
var help = byId('help');
if(help.classList.contains('undisplayed')) {
help.classList.remove('undisplayed');
} else {
help.classList.add('undisplayed');
}
};
Dashboard.prototype.getFilter = function(value) {
return byId('filter-box').value;
};
Dashboard.prototype.setFilter = function(value) {
byId('filter-box').value = value;
byId('filter-box').onchange();
};
var ds = new Dashboard();
</script>
</body>
</html>