Skip to content

Commit

Permalink
Update github parser to use the YAML feed.
Browse files Browse the repository at this point in the history
This commit also includes an update to the rakudolog test case.
The novemberlog test needs to be updated.
  • Loading branch information
Infinoid committed Aug 17, 2010
1 parent 7a4db20 commit db2d931
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 165 deletions.
87 changes: 43 additions & 44 deletions modules/local/githubparser.pm
Expand Up @@ -2,7 +2,7 @@ package modules::local::githubparser;
use strict;
use warnings;

use XML::Atom::Client;
use YAML::Syck;
use HTML::Entities;

use base 'modules::local::karmalog';
Expand Down Expand Up @@ -105,23 +105,22 @@ sub process_branch {
# allow the testsuite to call us in a slightly different way.
$self = $self->get_self() unless ref $self;
if(!defined($feed)) {
my $atom = XML::Atom::Client->new();
$feed = $atom->getFeed($$self{branches}{$branch}{url});
$feed = get_yaml($$self{branches}{$branch}{url});
}
if(!defined($feed)) {
warn "could not fetch branch $branch feed " . $$self{branches}{$branch}{url};
return;
}

my @items = $feed->entries;
@items = sort { $a->updated cmp $b->updated } @items; # ascending order
my @items = @{$$feed{commits}};
@items = sort { $$a{committed_date} cmp $$b{committed_date} } @items; # ascending order
my $newest = $items[-1];
my $latest = $newest->updated;
my $latest = $$newest{committed_date};

# skip the first run, to prevent new installs from flooding the channel
foreach my $item (@items) {
my $link = $item->link->href;
my ($rev) = $link =~ m|/commit/([a-z0-9]{40})|;
my $link = $$item{url};
my ($rev) = $$item{id};
my ($proj) = $link =~ m|http://github.com/[^/]+/([^/]+)/|;
if(exists($$self{not_first_time})) {
return unless $proj eq $$self{project};
Expand Down Expand Up @@ -232,7 +231,8 @@ sub try_link {
foreach my $branchname (@$branches) {
my $branch = $$self{branches}{$branchname};
if(!defined($branch)) {
my $url = "http://github.com/feeds/$author/commits/$project/$branchname";
# http://github.com/api/v2/yaml/commits/list/rakudo/rakudo/master
my $url = "http://github.com/api/v2/yaml/commits/list/$author/$project/$branchname";
$$self{branches}{$branchname} = $branch = {
url => $url,
targets => [],
Expand Down Expand Up @@ -280,45 +280,23 @@ repository.
sub output_item {
my ($self, $item, $branch, $link, $rev) = @_;
my $prefix = 'unknown';
my $creator = $item->author;
if(defined($creator)) {
$creator = $creator->name;
} else {
$creator = 'unknown';
}
my $desc = $item->content;
if(defined($desc)) {
$desc = $desc->body;
} else {
$desc = '(no commit message)';
}
my $creator = $$item{author}{login};
$creator = $$item{author}{name} unless defined $creator;
$creator = 'unknown' unless defined $creator;
my $desc = $$item{message};
$desc = '(no commit message)' unless defined $desc;

my ($log, $files);
$desc =~ s/^.*<pre>//;
$desc =~ s/<\/pre>.*$//;
my @lines = split("\n", $desc);
my @files;
while($lines[0] =~ /^[+m-] (.+)/) {
push(@files, $1);
shift(@lines);
}
return main::lprint($$self{project}.": error parsing filenames from description")
unless $lines[0] eq '';
shift(@lines);
pop(@lines) if $lines[-1] =~ /^git-svn-id: http:/;
pop(@lines) while scalar(@lines) && $lines[-1] eq '';
$log = join("\n", @lines);

$prefix = longest_common_prefix(@files);
$prefix //= '/';
$prefix =~ s|^/||; # cut off the leading slash
if(scalar @files > 1) {
$prefix .= " (" . scalar(@files) . " files)";
}
# $prefix = "";
# $prefix //= '/';
# $prefix =~ s|^/||; # cut off the leading slash
# if(scalar @files > 1) {
# $prefix .= " (" . scalar(@files) . " files)";
# }

$log =~ s|<br */>||g;
decode_entities($log);
my @log_lines = split(/[\r\n]+/, $log);
$rev = substr($rev, 0, 7);

my $project = $$self{project};
Expand All @@ -330,9 +308,9 @@ sub output_item {
feed => $project,
rev => $rev,
user => $creator,
log => \@log_lines,
log => \@lines,
link => $link,
prefix => $prefix,
# prefix => $prefix,
targets => $$self{branches}{$branch}{targets},
);

Expand Down Expand Up @@ -365,4 +343,25 @@ sub get_self {
return $objects_by_package{$pkg};
}

=head2 fetch_yaml
Given a URL, fetches content and tries to parse as a YAML document. Returns
undef on error.
=cut

my $lwp = LWP::UserAgent->new();
$lwp->timeout(60);
$lwp->env_proxy();

sub fetch_yaml {
my $url = shift;
my $response = $lwp->get($url);
if($response->is_success) {
my $rv = Load($response->content);
return $rv;
}
return undef;
}

1;
165 changes: 44 additions & 121 deletions t/rakudolog.t
Expand Up @@ -17,33 +17,33 @@ BEGIN { $tests = 0; };

plan tests => $tests;

my $xml_header = << '__XML__';
<?xml version="1.0" encoding="UTF-8"?>
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
<id>tag:github.com,2008:/feeds/rakudo/commits/rakudo/master</id>
<link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commits/master/"/>
<link type="application/atom+xml" rel="self" href="http://github.com/feeds/rakudo/commits/rakudo/master"/>
<title>Recent Commits to rakudo:master</title>
__XML__
my $yaml_header = << '__YAML__';
---
commits:
__YAML__

my $xml_footer = << '__XML__';
<entry>
<id>tag:github.com,2008:Grit::Commit/c7d2d7784f80b2c9f05b68d4aa5a6e21a2f2a257</id>
<link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/c7d2d7784f80b2c9f05b68d4aa5a6e21a2f2a257"/>
<title>Merge branch 'master' of git@github.com:rakudo/rakudo</title>
<updated>2009-05-01T09:32:55-07:00</updated>
<content type="html">&lt;pre&gt;
Merge branch 'master' of git@github.com:rakudo/rakudo&lt;/pre&gt;</content>
<author>
<name>pmichaud</name>
</author>
</entry>
</feed>
__XML__
my $yaml_footer = << '__YAML__';
- parents:
- id: 83b2cdfa64becdef052417962cc114e38f5920d8
author:
name: Carl Masak
login: masak
email: cmasak@gmail.com
url: http://github.com/rakudo/rakudo/commit/73a76745a0de01b7361a75ac8347c516532f92aa
id: 73a76745a0de01b7361a75ac8347c516532f92aa
committed_date: "2010-08-15T16:59:44-07:00"
authored_date: "2010-08-15T16:59:44-07:00"
message: "[Buf] added prefix/infix ~^, infix ~& and infix ~|"
tree: c797ca702afa584fa8a28d44376b3e942174c30e
committer:
name: Carl Masak
login: masak
email: cmasak@gmail.com
__YAML__

# initial sync
my $xml = $xml_header . '<updated>2009-05-01T09:32:55-07:00</updated>' . $xml_footer;
my $feed = XML::Atom::Feed->new(\$xml);
my $yaml = $yaml_header . $yaml_footer;
my $feed = YAML::Syck::Load($yaml);
my $rl = modules::local::rakudolog->get_self();
ok(!exists($$rl{lastrev}), "no lastrev by default");
call_func('process_branch', 'master', $feed);
Expand All @@ -55,28 +55,26 @@ BEGIN { $tests += 3 };
# update
reset_output();
$$rl{not_first_time} = 1;
$xml_footer = << '__XML__' . $xml_footer;
<entry>
<id>tag:github.com,2008:Grit::Commit/7f5af50c19baf360dacc5779b9c013fb14db34d3</id>
<link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/7f5af50c19baf360dacc5779b9c013fb14db34d3"/>
<title>Big refactor of Rakudo's enums, making them more compliant with S12, and building them with much less generated code. Track an enum related grammar change from STD.pm too. Also gets rid of various bits of cruft that only hung around because of the previous enums implementation needing them. Bool is no longer sort-of-enum-ish (before we had some curious interactions there). Also an infinite loop in infix:&lt;but&gt; is fixed.</title>
<updated>2009-05-01T09:58:40-07:00</updated>
<content type="html">&lt;pre&gt;m src/builtins/enums.pir
m src/builtins/guts.pir
m src/builtins/op.pir
m src/classes/Abstraction.pir
m src/classes/Bool.pir
m src/parser/actions.pm
m src/parser/grammar.pg
Big refactor of Rakudo's enums, making them more compliant with S12, and building them with much less generated code. Track an enum related grammar change from STD.pm too. Also gets rid of various bits of cruft that only hung around because of the previous enums implementation needing them. Bool is no longer sort-of-enum-ish (before we had some curious interactions there). Also an infinite loop in infix:&amp;lt;but&amp;gt; is fixed.&lt;/pre&gt;</content>
<author>
<name>jnthn</name>
</author>
</entry>
__XML__
$xml = $xml_header . '<updated>2009-05-01T09:58:40-07:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
$yaml_footer = << '__YAML__' . $yaml_footer;
- parents:
- id: 73a76745a0de01b7361a75ac8347c516532f92aa
author:
name: Carl Masak
login: masak
email: cmasak@gmail.com
url: http://github.com/rakudo/rakudo/commit/bef86ee3a3253a8840c077e5d1a089579949a58a
id: bef86ee3a3253a8840c077e5d1a089579949a58a
committed_date: "2010-08-15T17:04:08-07:00"
authored_date: "2010-08-15T17:04:08-07:00"
message: "[t/spectest.data] added S03-operators/buf.t"
tree: c809290826b06fa32adae1a76d1d987ed632b5cc
committer:
name: Carl Masak
login: masak
email: cmasak@gmail.com
__YAML__
$yaml = $yaml_header . $yaml_footer;
$feed = YAML::Syck::Load($yaml);
call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 6, "6 lines of output");
Expand All @@ -87,78 +85,3 @@ is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
like($$output[1]{text}, qr|rakudo: |, "master branch");
BEGIN { $tests += 7 };

# update with multiple commits having the same timestamp
reset_output();
$xml_footer = << '__XML__' . $xml_footer;
<entry>
<id>tag:github.com,2008:Grit::Commit/5bd02be9924c2f6013e4601e55d103b1e1a30a14</id>
<link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/5bd02be9924c2f6013e4601e55d103b1e1a30a14"/>
<title>Small optimizations to signature binding; costs us a PMC creation and a method call less every invocation of something that has a signature, which gives a 7% speed-up in a calling benchmark.</title>
<updated>2009-05-15T06:45:18-07:00</updated>
<content type="html">&lt;pre&gt;m src/classes/Signature.pir
Small optimizations to signature binding; costs us a PMC creation and a method call less every invocation of something that has a signature, which gives a 7% speed-up in a calling benchmark.&lt;/pre&gt;</content>
<author>
<name>jnthn</name>
</author>
</entry>
<entry>
<id>tag:github.com,2008:Grit::Commit/b49cce1a84c1f229d1c542c2dc2556e2912aa960</id>
<link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/b49cce1a84c1f229d1c542c2dc2556e2912aa960"/>
<title>Add some micro-benchmakrs.</title>
<updated>2009-05-15T06:45:18-07:00</updated>
<content type="html">&lt;pre&gt;+ tools/benchmark.pl
Add some micro-benchmakrs.&lt;/pre&gt;</content>
<author>
<name>jnthn</name>
</author>
</entry>
__XML__
$xml = $xml_header . '<updated>2009-05-15T06:45:18-07:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 12, "12 lines of output");
is($$output[0]{net} , 'magnet' , "line to magnet/#parrot");
is($$output[0]{chan}, '#parrot' , "line to magnet/#parrot");
is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
# The module sorts by <updated> time, but the time is the same for these two commits.
# Do it this way so we don't depend on perl's internal sort algorithm details.
my @message_list = ($$output[2]{text}, $$output[8]{text});
is(scalar grep(/Small optimizations/ , @message_list), 1, "log message");
is(scalar grep(/Add some micro-bench/, @message_list), 1, "log message");
BEGIN { $tests += 7 };

# update with their post-2010-02-26 feed format (added rel attribute to the <link> tag)
reset_output();
$xml_footer = << '__XML__' . $xml_footer;
<entry>
<id>tag:github.com,2008:Grit::Commit/b131f6052a181bdad8f7b9e5abe30c9c2c2e360e</id>
<link type="text/html" href="http://github.com/rakudo/rakudo/commit/b131f6052a181bdad8f7b9e5abe30c9c2c2e360e" rel="alternate"/>
<title>Split .defined method and defined vtable method for language interop, as per pmichaud++.</title>
<updated>2010-02-27T16:46:53-08:00</updated>
<content type="html">&lt;pre&gt;m src/builtins/Parcel.pir
Split .defined method and defined vtable method for language interop, as per pmichaud++.&lt;/pre&gt;</content>
<author>
<name>Jonathan Worthington</name>
</author>
</entry>
__XML__
$xml = $xml_header . '<updated>2010-02-27T16:46:53-08:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 6, "6 lines of output");
is($$output[0]{net} , 'magnet' , "line to magnet/#parrot");
is($$output[0]{chan}, '#parrot' , "line to magnet/#parrot");
is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
# The module sorts by <updated> time, but the time is the same for these two commits.
# Do it this way so we don't depend on perl's internal sort algorithm details.
@message_list = ($$output[2]{text});
is(scalar grep(/Split .defined method/ , @message_list), 1, "log message");
BEGIN { $tests += 6 };

0 comments on commit db2d931

Please sign in to comment.