Skip to content

Commit 966db4d

Browse files
author
epriestley
committed
Add an intracluster synchronization log for cluster repositories
Summary: Depends on D19778. Ref T13216. See PHI943, PHI889, et al. We currently have a push log and a pull log, but do not separately log intracluster synchronization events. We've encountered several specific cases where having this kind of log would be helpful: - In PHI943, an install was accidentally aborting locks early. Having timing information in the sync log would let us identify this more quickly. - In PHI889, an install hit an issue with `MaxStartups` configuration in `sshd`. A log would let us identify when this is an issue. - In PHI889, I floated a "push the linux kernel + fetch timeout" theory. A sync log would let us see sync/fetch timeouts to confirm this being a problem in practice. - A sync log will help us assess, develop, test, and monitor intracluster routing sync changes (likely those in T13211) in the future. Some of these events are present in the pull log already, but only if they make it as far as running a `git upload-pack` subprocess (not the case with `MaxStartups` problems) -- and they can't record end-to-end timing. No UI yet, I'll add that in a future change. Test Plan: - Forced all operations to synchronize by adding `|| true` to the version check. - Pulled, got a sync log in the database. Reviewers: amckinley Reviewed By: amckinley Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam Maniphest Tasks: T13216 Differential Revision: https://secure.phabricator.com/D19779
1 parent e09d29f commit 966db4d

File tree

6 files changed

+338
-6
lines changed

6 files changed

+338
-6
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
CREATE TABLE {$NAMESPACE}_repository.repository_syncevent (
2+
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
3+
phid VARBINARY(64) NOT NULL,
4+
repositoryPHID VARBINARY(64) NOT NULL,
5+
epoch INT UNSIGNED NOT NULL,
6+
devicePHID VARBINARY(64) NOT NULL,
7+
fromDevicePHID VARBINARY(64) NOT NULL,
8+
deviceVersion INT UNSIGNED,
9+
fromDeviceVersion INT UNSIGNED,
10+
resultType VARCHAR(32) NOT NULL COLLATE {$COLLATE_TEXT},
11+
resultCode INT UNSIGNED NOT NULL,
12+
syncWait BIGINT UNSIGNED NOT NULL,
13+
properties LONGTEXT NOT NULL COLLATE {$COLLATE_TEXT}
14+
) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};

src/__phutil_library_map__.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4163,6 +4163,9 @@
41634163
'PhabricatorRepositorySvnCommitChangeParserWorker' => 'applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php',
41644164
'PhabricatorRepositorySvnCommitMessageParserWorker' => 'applications/repository/worker/commitmessageparser/PhabricatorRepositorySvnCommitMessageParserWorker.php',
41654165
'PhabricatorRepositorySymbol' => 'applications/repository/storage/PhabricatorRepositorySymbol.php',
4166+
'PhabricatorRepositorySyncEvent' => 'applications/repository/storage/PhabricatorRepositorySyncEvent.php',
4167+
'PhabricatorRepositorySyncEventPHIDType' => 'applications/repository/phid/PhabricatorRepositorySyncEventPHIDType.php',
4168+
'PhabricatorRepositorySyncEventQuery' => 'applications/repository/query/PhabricatorRepositorySyncEventQuery.php',
41664169
'PhabricatorRepositoryTestCase' => 'applications/repository/storage/__tests__/PhabricatorRepositoryTestCase.php',
41674170
'PhabricatorRepositoryTransaction' => 'applications/repository/storage/PhabricatorRepositoryTransaction.php',
41684171
'PhabricatorRepositoryTransactionQuery' => 'applications/repository/query/PhabricatorRepositoryTransactionQuery.php',
@@ -10111,6 +10114,12 @@
1011110114
'PhabricatorRepositorySvnCommitChangeParserWorker' => 'PhabricatorRepositoryCommitChangeParserWorker',
1011210115
'PhabricatorRepositorySvnCommitMessageParserWorker' => 'PhabricatorRepositoryCommitMessageParserWorker',
1011310116
'PhabricatorRepositorySymbol' => 'PhabricatorRepositoryDAO',
10117+
'PhabricatorRepositorySyncEvent' => array(
10118+
'PhabricatorRepositoryDAO',
10119+
'PhabricatorPolicyInterface',
10120+
),
10121+
'PhabricatorRepositorySyncEventPHIDType' => 'PhabricatorPHIDType',
10122+
'PhabricatorRepositorySyncEventQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
1011410123
'PhabricatorRepositoryTestCase' => 'PhabricatorTestCase',
1011510124
'PhabricatorRepositoryTransaction' => 'PhabricatorApplicationTransaction',
1011610125
'PhabricatorRepositoryTransactionQuery' => 'PhabricatorApplicationTransactionQuery',

src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,10 @@ public function synchronizeWorkingCopyBeforeRead() {
206206
}
207207
}
208208

209-
$this->synchronizeWorkingCopyFromDevices($fetchable);
209+
$this->synchronizeWorkingCopyFromDevices(
210+
$fetchable,
211+
$this_version,
212+
$max_version);
210213
} else {
211214
$this->synchronizeWorkingCopyFromRemote();
212215
}
@@ -653,7 +656,11 @@ private function synchronizeWorkingCopyFromRemote() {
653656
/**
654657
* @task internal
655658
*/
656-
private function synchronizeWorkingCopyFromDevices(array $device_phids) {
659+
private function synchronizeWorkingCopyFromDevices(
660+
array $device_phids,
661+
$local_version,
662+
$remote_version) {
663+
657664
$repository = $this->getRepository();
658665

659666
$service = $repository->loadAlmanacService();
@@ -694,7 +701,10 @@ private function synchronizeWorkingCopyFromDevices(array $device_phids) {
694701
$caught = null;
695702
foreach ($fetchable as $binding) {
696703
try {
697-
$this->synchronizeWorkingCopyFromBinding($binding);
704+
$this->synchronizeWorkingCopyFromBinding(
705+
$binding,
706+
$local_version,
707+
$remote_version);
698708
$caught = null;
699709
break;
700710
} catch (Exception $ex) {
@@ -711,14 +721,17 @@ private function synchronizeWorkingCopyFromDevices(array $device_phids) {
711721
/**
712722
* @task internal
713723
*/
714-
private function synchronizeWorkingCopyFromBinding($binding) {
724+
private function synchronizeWorkingCopyFromBinding(
725+
AlmanacBinding $binding,
726+
$local_version,
727+
$remote_version) {
728+
715729
$repository = $this->getRepository();
716730
$device = AlmanacKeys::getLiveDevice();
717731

718732
$this->logLine(
719733
pht(
720-
'Synchronizing this device ("%s") from cluster leader ("%s") before '.
721-
'read.',
734+
'Synchronizing this device ("%s") from cluster leader ("%s").',
722735
$device->getName(),
723736
$binding->getDevice()->getName()));
724737

@@ -746,17 +759,60 @@ private function synchronizeWorkingCopyFromBinding($binding) {
746759

747760
$future->setCWD($local_path);
748761

762+
$log = PhabricatorRepositorySyncEvent::initializeNewEvent()
763+
->setRepositoryPHID($repository->getPHID())
764+
->setEpoch(PhabricatorTime::getNow())
765+
->setDevicePHID($device->getPHID())
766+
->setFromDevicePHID($binding->getDevice()->getPHID())
767+
->setDeviceVersion($local_version)
768+
->setFromDeviceVersion($remote_version);
769+
770+
$sync_start = microtime(true);
771+
749772
try {
750773
$future->resolvex();
751774
} catch (Exception $ex) {
775+
$sync_end = microtime(true);
776+
$log->setSyncWait((int)(1000000 * ($sync_end - $sync_start)));
777+
778+
if ($ex instanceof CommandException) {
779+
if ($future->getWasKilledByTimeout()) {
780+
$result_type = PhabricatorRepositorySyncEvent::RESULT_TIMEOUT;
781+
} else {
782+
$result_type = PhabricatorRepositorySyncEvent::RESULT_ERROR;
783+
}
784+
785+
$log
786+
->setResultCode($ex->getError())
787+
->setResultType($result_type)
788+
->setProperty('stdout', $ex->getStdout())
789+
->setProperty('stderr', $ex->getStderr());
790+
} else {
791+
$log
792+
->setResultCode(1)
793+
->setResultType(PhabricatorRepositorySyncEvent::RESULT_EXCEPTION)
794+
->setProperty('message', $ex->getMessage());
795+
}
796+
797+
$log->save();
798+
752799
$this->logLine(
753800
pht(
754801
'Synchronization of "%s" from leader "%s" failed: %s',
755802
$device->getName(),
756803
$binding->getDevice()->getName(),
757804
$ex->getMessage()));
805+
758806
throw $ex;
759807
}
808+
809+
$sync_end = microtime(true);
810+
811+
$log
812+
->setSyncWait((int)(1000000 * ($sync_end - $sync_start)))
813+
->setResultCode(0)
814+
->setResultType(PhabricatorRepositorySyncEvent::RESULT_SYNC)
815+
->save();
760816
}
761817

762818

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<?php
2+
3+
final class PhabricatorRepositorySyncEventPHIDType extends PhabricatorPHIDType {
4+
5+
const TYPECONST = 'SYNE';
6+
7+
public function getTypeName() {
8+
return pht('Sync Event');
9+
}
10+
11+
public function newObject() {
12+
return new PhabricatorRepositorySyncEvent();
13+
}
14+
15+
public function getPHIDTypeApplicationClass() {
16+
return 'PhabricatorDiffusionApplication';
17+
}
18+
19+
protected function buildQueryForObjects(
20+
PhabricatorObjectQuery $query,
21+
array $phids) {
22+
23+
return id(new PhabricatorRepositorySyncEventQuery())
24+
->withPHIDs($phids);
25+
}
26+
27+
public function loadHandles(
28+
PhabricatorHandleQuery $query,
29+
array $handles,
30+
array $objects) {
31+
32+
foreach ($handles as $phid => $handle) {
33+
$event = $objects[$phid];
34+
35+
$handle->setName(pht('Sync Event %d', $event->getID()));
36+
}
37+
}
38+
39+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
<?php
2+
3+
final class PhabricatorRepositorySyncEventQuery
4+
extends PhabricatorCursorPagedPolicyAwareQuery {
5+
6+
private $ids;
7+
private $phids;
8+
private $repositoryPHIDs;
9+
private $epochMin;
10+
private $epochMax;
11+
12+
public function withIDs(array $ids) {
13+
$this->ids = $ids;
14+
return $this;
15+
}
16+
17+
public function withPHIDs(array $phids) {
18+
$this->phids = $phids;
19+
return $this;
20+
}
21+
22+
public function withRepositoryPHIDs(array $repository_phids) {
23+
$this->repositoryPHIDs = $repository_phids;
24+
return $this;
25+
}
26+
27+
public function withEpochBetween($min, $max) {
28+
$this->epochMin = $min;
29+
$this->epochMax = $max;
30+
return $this;
31+
}
32+
33+
public function newResultObject() {
34+
return new PhabricatorRepositoryPullEvent();
35+
}
36+
37+
protected function loadPage() {
38+
return $this->loadStandardPage($this->newResultObject());
39+
}
40+
41+
protected function willFilterPage(array $events) {
42+
$repository_phids = mpull($events, 'getRepositoryPHID');
43+
$repository_phids = array_filter($repository_phids);
44+
45+
if ($repository_phids) {
46+
$repositories = id(new PhabricatorRepositoryQuery())
47+
->setViewer($this->getViewer())
48+
->withPHIDs($repository_phids)
49+
->execute();
50+
$repositories = mpull($repositories, null, 'getPHID');
51+
} else {
52+
$repositories = array();
53+
}
54+
55+
foreach ($events as $key => $event) {
56+
$phid = $event->getRepositoryPHID();
57+
58+
if (empty($repositories[$phid])) {
59+
unset($events[$key]);
60+
$this->didRejectResult($event);
61+
continue;
62+
}
63+
64+
$event->attachRepository($repositories[$phid]);
65+
}
66+
67+
return $events;
68+
}
69+
70+
protected function buildWhereClauseParts(AphrontDatabaseConnection $conn) {
71+
$where = parent::buildWhereClauseParts($conn);
72+
73+
if ($this->ids !== null) {
74+
$where[] = qsprintf(
75+
$conn,
76+
'id IN (%Ld)',
77+
$this->ids);
78+
}
79+
80+
if ($this->phids !== null) {
81+
$where[] = qsprintf(
82+
$conn,
83+
'phid IN (%Ls)',
84+
$this->phids);
85+
}
86+
87+
if ($this->repositoryPHIDs !== null) {
88+
$where[] = qsprintf(
89+
$conn,
90+
'repositoryPHID IN (%Ls)',
91+
$this->repositoryPHIDs);
92+
}
93+
94+
if ($this->epochMin !== null) {
95+
$where[] = qsprintf(
96+
$conn,
97+
'epoch >= %d',
98+
$this->epochMin);
99+
}
100+
101+
if ($this->epochMax !== null) {
102+
$where[] = qsprintf(
103+
$conn,
104+
'epoch <= %d',
105+
$this->epochMax);
106+
}
107+
108+
return $where;
109+
}
110+
111+
public function getQueryApplicationClass() {
112+
return 'PhabricatorDiffusionApplication';
113+
}
114+
115+
}

0 commit comments

Comments
 (0)