Skip to content

Commit 4ef918e

Browse files
author
epriestley
committed
Add a garbage collector daemon
Summary: Phabricator generates a bunch of data that we don't need to keep around forever, add a GC daemon to get rid of it with some basic configuration options. This needs a couple more diffs to get some of the details but I think this is a reasonable start. I also fixed a couple of UI things related to this, e.g. the daemon logs page going crazy when a daemon gets stuck in a loop and dumps tons of data to stdout. Test Plan: - Ran gc daemon in 'phd debug' mode and saw it delete stuff, then sleep once it had cleaned everything up. - Mucked around with TTLs and verified they work correctly. - Viewed gc'd transcripts in the web interface and made sure they displayed okay. - Viewed daemon logs before/after garbage collection. - Running some run-at / run-for tests now, I'll update if the daemon doesn't shut off in ~10-15 minutes. :P Reviewed By: tuomaspelkonen Reviewers: jungejason, tuomaspelkonen, aran CC: aran, tuomaspelkonen, epriestley Differential Revision: 583
1 parent 51de554 commit 4ef918e

File tree

10 files changed

+258
-50
lines changed

10 files changed

+258
-50
lines changed

conf/default.conf.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,37 @@
375375
// silly (but sort of awesome).
376376
'remarkup.enable-embedded-youtube' => false,
377377

378+
379+
// -- Garbage Collection ---------------------------------------------------- //
380+
381+
// Phabricator generates various logs and caches in the database which can
382+
// be garbage collected after a while to make the total data size more
383+
// manageable. To run garbage collection, launch a
384+
// PhabricatorGarbageCollector daemon.
385+
386+
// Since the GC daemon can issue large writes and table scans, you may want to
387+
// run it only during off hours or make sure it is scheduled so it doesn't
388+
// overlap with backups. This determines when the daemon can start running
389+
// each day.
390+
'gcdaemon.run-at' => '12 AM',
391+
392+
// How many seconds after 'gcdaemon.run-at' the daemon may collect garbage
393+
// for. By default it runs continuously, but you can set it to run for a
394+
// limited period of time. For instance, if you do backups at 3 AM, you might
395+
// run garbage collection for an hour beforehand. This is not a high-precision
396+
// limit so you may want to leave some room for the GC to actually stop, and
397+
// if you set it to something like 3 seconds you're on your own.
398+
'gcdaemon.run-for' => 24 * 60 * 60,
399+
400+
// These 'ttl' keys configure how much old data the GC daemon keeps around.
401+
// Objects older than the ttl will be collected. Set any value to 0 to store
402+
// data indefinitely.
403+
404+
'gcdaemon.ttl.herald-transcripts' => 30 * (24 * 60 * 60),
405+
'gcdaemon.ttl.daemon-logs' => 7 * (24 * 60 * 60),
406+
'gcdaemon.ttl.differential-render-cache' => 7 * (24 * 60 * 60),
407+
408+
378409
// -- Customization --------------------------------------------------------- //
379410

380411
// Paths to additional phutil libraries to load.

scripts/install/update_phabricator.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,6 @@ sudo /etc/init.d/httpd start
5858

5959
# $ROOT/phabricator/bin/phd repository-launch-master
6060
# $ROOT/phabricator/bin/phd launch metamta
61+
# $ROOT/phabricator/bin/phd launch garbagecollector
6162
# $ROOT/phabricator/bin/phd launch 4 taskmaster
6263
# $ROOT/phabricator/bin/phd launch ircbot /config/bot.json

src/__phutil_library_map__.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@
352352
'PhabricatorFileURI' => 'applications/files/uri',
353353
'PhabricatorFileUploadController' => 'applications/files/controller/upload',
354354
'PhabricatorFileViewController' => 'applications/files/controller/view',
355+
'PhabricatorGarbageCollectorDaemon' => 'infrastructure/daemon/garbagecollector',
355356
'PhabricatorGoodForNothingWorker' => 'infrastructure/daemon/workers/worker/goodfornothing',
356357
'PhabricatorHandleObjectSelectorDataView' => 'applications/phid/handle/view/selector',
357358
'PhabricatorHelpController' => 'applications/help/controller/base',
@@ -844,6 +845,7 @@
844845
'PhabricatorFileTransformController' => 'PhabricatorFileController',
845846
'PhabricatorFileUploadController' => 'PhabricatorFileController',
846847
'PhabricatorFileViewController' => 'PhabricatorFileController',
848+
'PhabricatorGarbageCollectorDaemon' => 'PhabricatorDaemon',
847849
'PhabricatorGoodForNothingWorker' => 'PhabricatorWorker',
848850
'PhabricatorHelpController' => 'PhabricatorController',
849851
'PhabricatorHelpKeyboardShortcutController' => 'PhabricatorHelpController',

src/applications/daemon/view/daemonlogevents/PhabricatorDaemonLogEventsView.php

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,46 @@ public function render() {
4343
}
4444

4545
foreach ($this->events as $event) {
46+
47+
// Limit display log size. If a daemon gets stuck in an output loop this
48+
// page can be like >100MB if we don't truncate stuff. Try to do cheap
49+
// line-based truncation first, and fall back to expensive UTF-8 character
50+
// truncation if that doesn't get things short enough.
51+
52+
$message = $event->getMessage();
53+
54+
$more_lines = null;
55+
$more_chars = null;
56+
$line_limit = 12;
57+
if (substr_count($message, "\n") > $line_limit) {
58+
$message = explode("\n", $message);
59+
$more_lines = count($message) - $line_limit;
60+
$message = array_slice($message, 0, $line_limit);
61+
$message = implode("\n", $message);
62+
}
63+
64+
$char_limit = 8192;
65+
if (strlen($message) > $char_limit) {
66+
$message = phutil_utf8v($message);
67+
$more_chars = count($message) - $char_limit;
68+
$message = array_slice($message, 0, $char_limit);
69+
$message = implode('', $message);
70+
}
71+
72+
$more = null;
73+
if ($more_chars) {
74+
$more = number_format($more_chars);
75+
$more = "\n<... {$more} more characters ...>";
76+
} else if ($more_lines) {
77+
$more = number_format($more_lines);
78+
$more = "\n<... {$more} more lines ...>";
79+
}
80+
4681
$row = array(
4782
phutil_escape_html($event->getLogType()),
4883
phabricator_date($event->getEpoch(), $this->user),
4984
phabricator_time($event->getEpoch(), $this->user),
50-
str_replace("\n", '<br />', phutil_escape_html($event->getMessage())),
85+
str_replace("\n", '<br />', phutil_escape_html($message.$more)),
5186
);
5287

5388
if ($this->combinedLog) {

src/applications/daemon/view/daemonlogevents/__init__.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
phutil_require_module('phabricator', 'view/utils');
1212

1313
phutil_require_module('phutil', 'markup');
14+
phutil_require_module('phutil', 'utils');
1415

1516

1617
phutil_require_source('PhabricatorDaemonLogEventsView.php');

src/applications/herald/controller/transcript/HeraldTranscriptController.php

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -42,40 +42,45 @@ public function processRequest() {
4242
throw new Exception('Uknown transcript!');
4343
}
4444

45-
$field_names = HeraldFieldConfig::getFieldMap();
46-
$condition_names = HeraldConditionConfig::getConditionMap();
47-
$action_names = HeraldActionConfig::getActionMap();
48-
4945
require_celerity_resource('herald-test-css');
5046

51-
$filter = $this->getFilterPHIDs();
52-
$this->filterTranscript($xscript, $filter);
53-
$phids = array_merge($filter, $this->getTranscriptPHIDs($xscript));
54-
$phids = array_unique($phids);
55-
$phids = array_filter($phids);
56-
57-
$handles = id(new PhabricatorObjectHandleData($phids))
58-
->loadHandles();
59-
$this->handles = $handles;
47+
$nav = $this->buildSideNav();
6048

6149
$object_xscript = $xscript->getObjectTranscript();
50+
if (!$object_xscript) {
51+
$notice = id(new AphrontErrorView())
52+
->setSeverity(AphrontErrorView::SEVERITY_NOTICE)
53+
->setTitle('Old Transcript')
54+
->appendChild(
55+
'<p>Details of this transcript have been garbage collected.</p>');
56+
$nav->appendChild($notice);
57+
} else {
58+
$filter = $this->getFilterPHIDs();
59+
$this->filterTranscript($xscript, $filter);
60+
$phids = array_merge($filter, $this->getTranscriptPHIDs($xscript));
61+
$phids = array_unique($phids);
62+
$phids = array_filter($phids);
63+
64+
$handles = id(new PhabricatorObjectHandleData($phids))
65+
->loadHandles();
66+
$this->handles = $handles;
6267

63-
$nav = $this->buildSideNav();
64-
65-
$apply_xscript_panel = $this->buildApplyTranscriptPanel(
66-
$xscript);
67-
$nav->appendChild($apply_xscript_panel);
68+
$apply_xscript_panel = $this->buildApplyTranscriptPanel(
69+
$xscript);
70+
$nav->appendChild($apply_xscript_panel);
6871

69-
$action_xscript_panel = $this->buildActionTranscriptPanel(
70-
$xscript);
71-
$nav->appendChild($action_xscript_panel);
72+
$action_xscript_panel = $this->buildActionTranscriptPanel(
73+
$xscript);
74+
$nav->appendChild($action_xscript_panel);
7275

73-
$object_xscript_panel = $this->buildObjectTranscriptPanel(
74-
$xscript);
75-
$nav->appendChild($object_xscript_panel);
76+
$object_xscript_panel = $this->buildObjectTranscriptPanel(
77+
$xscript);
78+
$nav->appendChild($object_xscript_panel);
79+
}
7680

7781
/*
7882
83+
TODO
7984
8085
$notice = null;
8186
if ($xscript->getDryRun()) {
@@ -84,30 +89,6 @@ public function processRequest() {
8489
This was a dry run to test Herald rules, no actions were executed.
8590
</tools:notice>;
8691
}
87-
88-
if (!$object_xscript) {
89-
$notice =
90-
<x:frag>
91-
<tools:notice title="Old Transcript">
92-
Details of this transcript have been discarded. Full transcripts
93-
are retained for 30 days.
94-
</tools:notice>
95-
{$notice}
96-
</x:frag>;
97-
}
98-
99-
100-
return
101-
<herald:standard-page title="Transcript">
102-
<div style="padding: 1em;">
103-
<tools:side-nav items={$this->renderNavItems()}>
104-
{$notice}
105-
{$apply_xscript_markup}
106-
{$rule_table}
107-
{$object_xscript_table}
108-
</tools:side-nav>
109-
</div>
110-
</herald:standard-page>;
11192
*/
11293

11394
return $this->buildStandardPageResponse(
@@ -264,7 +245,7 @@ protected function filterTranscript($xscript, $filter_phids) {
264245
foreach ($xscript->getApplyTranscripts() as $id => $apply_xscript) {
265246
$rule_id = $apply_xscript->getRuleID();
266247
if ($filter_owned) {
267-
if (!$rule_xscripts[$rule_id]) {
248+
if (empty($rule_xscripts[$rule_id])) {
268249
// No associated rule so you can't own this effect.
269250
continue;
270251
}

src/applications/herald/controller/transcript/__init__.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
phutil_require_module('phabricator', 'applications/phid/handle/data');
1515
phutil_require_module('phabricator', 'infrastructure/celerity/api');
1616
phutil_require_module('phabricator', 'view/control/table');
17+
phutil_require_module('phabricator', 'view/form/error');
1718
phutil_require_module('phabricator', 'view/layout/panel');
1819
phutil_require_module('phabricator', 'view/layout/sidenav');
1920

src/docs/configuration/managing_daemons.diviner

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,4 @@ You can get a list of launchable daemons with **phd list**:
7070
- **PhabricatorTaskmasterDaemon** runs a generic task queue; and
7171
- **PhabricatorRepository** daemons track repositories, descriptions are
7272
available in the @{article:Diffusion User Guide}.
73+
- **PhabricatorGarbageCollectorDaemon** cleans up old logs and caches.
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
<?php
2+
3+
/*
4+
* Copyright 2011 Facebook, Inc.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/**
20+
* Collects old logs and caches to reduce the amount of data stored in the
21+
* database.
22+
*
23+
* @group daemon
24+
*/
25+
class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
26+
27+
public function run() {
28+
29+
// Keep track of when we start and stop the GC so we can emit useful log
30+
// messages.
31+
$just_ran = false;
32+
33+
do {
34+
$run_at = PhabricatorEnv::getEnvConfig('gcdaemon.run-at');
35+
$run_for = PhabricatorEnv::getEnvConfig('gcdaemon.run-for');
36+
37+
// Just use the default timezone, we don't need to get fancy and try
38+
// to localize this.
39+
$start = strtotime($run_at);
40+
if ($start === false) {
41+
throw new Exception(
42+
"Configuration 'gcdaemon.run-at' could not be parsed: '{$run_at}'.");
43+
}
44+
45+
$now = time();
46+
47+
if ($now < $start || $now > ($start + $run_for)) {
48+
if ($just_ran) {
49+
echo "Stopped garbage collector.\n";
50+
$just_ran = false;
51+
}
52+
// The configuration says we can't collect garbage right now, so
53+
// just sleep until we can.
54+
$this->sleep(300);
55+
continue;
56+
}
57+
58+
if (!$just_ran) {
59+
echo "Started garbage collector.\n";
60+
$just_ran = true;
61+
}
62+
63+
$n_herald = $this->collectHeraldTranscripts();
64+
$n_daemon = $this->collectDaemonLogs();
65+
$n_render = $this->collectRenderCaches();
66+
67+
$collected = array(
68+
'Herald Transcript' => $n_herald,
69+
'Daemon Log' => $n_daemon,
70+
'Render Cache' => $n_render,
71+
);
72+
$collected = array_filter($collected);
73+
74+
foreach ($collected as $thing => $count) {
75+
$count = number_format($count);
76+
echo "Garbage collected {$count} '{$thing}' objects.\n";
77+
}
78+
79+
$total = array_sum($collected);
80+
if ($total < 100) {
81+
// We didn't max out any of the GCs so we're basically caught up. Ease
82+
// off the GC loop so we don't keep doing table scans just to delete
83+
// a handful of rows.
84+
$this->sleep(300);
85+
} else {
86+
$this->stillWorking();
87+
}
88+
} while (true);
89+
90+
}
91+
92+
private function collectHeraldTranscripts() {
93+
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.herald-transcripts');
94+
if ($ttl <= 0) {
95+
return 0;
96+
}
97+
98+
$table = new HeraldTranscript();
99+
$conn_w = $table->establishConnection('w');
100+
101+
queryfx(
102+
$conn_w,
103+
'UPDATE %T SET
104+
objectTranscript = "",
105+
ruleTranscripts = "",
106+
conditionTranscripts = "",
107+
applyTranscripts = ""
108+
WHERE `time` < %d AND objectTranscript != ""
109+
LIMIT 100',
110+
$table->getTableName(),
111+
time() - $ttl);
112+
113+
return $conn_w->getAffectedRows();
114+
}
115+
116+
private function collectDaemonLogs() {
117+
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.daemon-logs');
118+
if ($ttl <= 0) {
119+
return 0;
120+
}
121+
122+
$table = new PhabricatorDaemonLogEvent();
123+
$conn_w = $table->establishConnection('w');
124+
125+
queryfx(
126+
$conn_w,
127+
'DELETE FROM %T WHERE epoch < %d LIMIT 100',
128+
$table->getTableName(),
129+
time() - $ttl);
130+
131+
return $conn_w->getAffectedRows();
132+
}
133+
134+
private function collectRenderCaches() {
135+
// TODO: Implement this, no epoch column on the table right now.
136+
return 0;
137+
}
138+
139+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?php
2+
/**
3+
* This file is automatically generated. Lint this module to rebuild it.
4+
* @generated
5+
*/
6+
7+
8+
9+
phutil_require_module('phabricator', 'applications/herald/storage/transcript/base');
10+
phutil_require_module('phabricator', 'infrastructure/daemon/base');
11+
phutil_require_module('phabricator', 'infrastructure/daemon/storage/event');
12+
phutil_require_module('phabricator', 'infrastructure/env');
13+
phutil_require_module('phabricator', 'storage/queryfx');
14+
15+
16+
phutil_require_source('PhabricatorGarbageCollectorDaemon.php');

0 commit comments

Comments
 (0)