Skip to content

Commit b25e4fd

Browse files
committed
Added mysql-based deduping of received SHA1
This is an attempt to remove duplicate emails. These emails happen when (for instance) a merge occurs, and somehow, github pushes parts of both parents at us even though we already saw one of them before. This has been annoying a number of us for a while, and this is my first shot at curing the problem. Also split the config out into a separate file
1 parent 1c0f8f7 commit b25e4fd

File tree

4 files changed

+48
-3
lines changed

4 files changed

+48
-3
lines changed
File renamed without changes.

git_hooks/email_hook.cfg.example

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
debug = 1
2+
ignoreregexp = '^(?:master|fixes\/)'
3+
<db>
4+
database = email_hook
5+
host = localhost
6+
user = email_hook
7+
password = email_hook
8+
</db>

git_hooks/email_hook.pl

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
#! /usr/bin/perl
22
# vim:ts=4:sw=4:ai:et:si:sts=4
3+
34
use strict;
45
use warnings;
56
use Apache2::Const -compile => qw(M_POST HTTP_METHOD_NOT_ALLOWED);
67
use CGI;
78
use JSON;
89
use Mail::Send;
9-
10-
my $debug = 1;
10+
use Config::General;
11+
use DBI;
12+
use File::Basename;
13+
use English;
14+
use Cwd 'abs_path';
1115

1216
my $r = shift;
1317

@@ -17,6 +21,12 @@
1721
return;
1822
}
1923
24+
my $conffile = dirname(abs_path($0 or $PROGRAM_NAME)) . "email_hook.cfg"
25+
my $conf = new Config::General($conffile);
26+
my %config = $conf->getall;
27+
28+
my $debug = $config{'debug'} or 0;
29+
2030
$r->content_type('text/html');
2131
$r->print();
2232
@@ -35,10 +45,23 @@
3545
my $branch = $payload->{"ref"};
3646
$branch =~ s/^refs\/.*?\///;
3747
38-
if ($branch !~ /^(?:master|fixes\/)/) {
48+
$regexp = qr($config{'ignoreregexp'});
49+
if ($branch !~ $regexp) {
3950
exit 0;
4051
}
4152
53+
my $dbh = DBI->connect("dbi:mysql:database=".$config{'db'}{'database'}.
54+
":host=".$config{'db'}{'host'},
55+
$config{'db'}{'user'}, $config{'db'}{'password'})
56+
or die "Cannot connect to database: " . DBI::errstr . "\n";
57+
58+
my $q = "SELECT sha1 FROM seen WHERE sha1 = ?";
59+
my $select_h = $dbh->prepare($q);
60+
61+
$q = "INSERT INTO seen (sha1, lastseen) VALUES (?, NULL)";
62+
my $insert_h = $dbh->prepare($q);
63+
64+
4265
# These maybe should go into a config file later
4366
my %headers = (
4467
"From" => 'MythTV <noreply@mythtv.org>',
@@ -50,6 +73,10 @@
5073
5174
foreach my $commit ( @{$payload->{"commits"}} ) {
5275
my $longsha = $commit->{"id"};
76+
$select_h->execute($longsha);
77+
my ($resultsha) = $select_h->fetchrow_array;
78+
next if defined $resultsha;
79+
5380
my $shortsha = substr $longsha, 0, 9;
5481
my $changeurl = $commit->{"url"};
5582
$changeurl =~ s/$longsha$/$shortsha/;
@@ -103,5 +130,7 @@
103130
my $fh = $msg->open;
104131
print $fh $email;
105132
$fh->close;
133+
134+
$insert_h->execute($longsha);
106135
}
107136

git_hooks/email_hook.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
create database email_hook;
2+
grant all privileges on email_hook.* to email_hook@localhost identified by "email_hook";
3+
use email_hook;
4+
CREATE TABLE `seen` (
5+
`sha1` VARCHAR( 40 ) NOT NULL ,
6+
`lastseen` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ,
7+
PRIMARY KEY ( `sha1` )
8+
);

0 commit comments

Comments
 (0)