Permalink
Browse files

Update github parser to use the YAML feed.

This commit also includes an update to the rakudolog test case.
The novemberlog test needs to be updated.
  • Loading branch information...
1 parent 7a4db20 commit db2d931328ebe8f04c88ac65b7096d2abe1cdf2d @Infinoid committed Aug 17, 2010
Showing with 87 additions and 165 deletions.
  1. +43 −44 modules/local/githubparser.pm
  2. +44 −121 t/rakudolog.t
@@ -2,7 +2,7 @@ package modules::local::githubparser;
use strict;
use warnings;
-use XML::Atom::Client;
+use YAML::Syck;
use HTML::Entities;
use base 'modules::local::karmalog';
@@ -105,23 +105,22 @@ sub process_branch {
# allow the testsuite to call us in a slightly different way.
$self = $self->get_self() unless ref $self;
if(!defined($feed)) {
- my $atom = XML::Atom::Client->new();
- $feed = $atom->getFeed($$self{branches}{$branch}{url});
+ $feed = get_yaml($$self{branches}{$branch}{url});
}
if(!defined($feed)) {
warn "could not fetch branch $branch feed " . $$self{branches}{$branch}{url};
return;
}
- my @items = $feed->entries;
- @items = sort { $a->updated cmp $b->updated } @items; # ascending order
+ my @items = @{$$feed{commits}};
+ @items = sort { $$a{committed_date} cmp $$b{committed_date} } @items; # ascending order
my $newest = $items[-1];
- my $latest = $newest->updated;
+ my $latest = $$newest{committed_date};
# skip the first run, to prevent new installs from flooding the channel
foreach my $item (@items) {
- my $link = $item->link->href;
- my ($rev) = $link =~ m|/commit/([a-z0-9]{40})|;
+ my $link = $$item{url};
+ my ($rev) = $$item{id};
my ($proj) = $link =~ m|http://github.com/[^/]+/([^/]+)/|;
if(exists($$self{not_first_time})) {
return unless $proj eq $$self{project};
@@ -232,7 +231,8 @@ sub try_link {
foreach my $branchname (@$branches) {
my $branch = $$self{branches}{$branchname};
if(!defined($branch)) {
- my $url = "http://github.com/feeds/$author/commits/$project/$branchname";
+ # http://github.com/api/v2/yaml/commits/list/rakudo/rakudo/master
+ my $url = "http://github.com/api/v2/yaml/commits/list/$author/$project/$branchname";
$$self{branches}{$branchname} = $branch = {
url => $url,
targets => [],
@@ -280,45 +280,23 @@ repository.
sub output_item {
my ($self, $item, $branch, $link, $rev) = @_;
my $prefix = 'unknown';
- my $creator = $item->author;
- if(defined($creator)) {
- $creator = $creator->name;
- } else {
- $creator = 'unknown';
- }
- my $desc = $item->content;
- if(defined($desc)) {
- $desc = $desc->body;
- } else {
- $desc = '(no commit message)';
- }
+ my $creator = $$item{author}{login};
+ $creator = $$item{author}{name} unless defined $creator;
+ $creator = 'unknown' unless defined $creator;
+ my $desc = $$item{message};
+ $desc = '(no commit message)' unless defined $desc;
- my ($log, $files);
- $desc =~ s/^.*<pre>//;
- $desc =~ s/<\/pre>.*$//;
my @lines = split("\n", $desc);
- my @files;
- while($lines[0] =~ /^[+m-] (.+)/) {
- push(@files, $1);
- shift(@lines);
- }
- return main::lprint($$self{project}.": error parsing filenames from description")
- unless $lines[0] eq '';
- shift(@lines);
pop(@lines) if $lines[-1] =~ /^git-svn-id: http:/;
pop(@lines) while scalar(@lines) && $lines[-1] eq '';
- $log = join("\n", @lines);
- $prefix = longest_common_prefix(@files);
- $prefix //= '/';
- $prefix =~ s|^/||; # cut off the leading slash
- if(scalar @files > 1) {
- $prefix .= " (" . scalar(@files) . " files)";
- }
+# $prefix = "";
+# $prefix //= '/';
+# $prefix =~ s|^/||; # cut off the leading slash
+# if(scalar @files > 1) {
+# $prefix .= " (" . scalar(@files) . " files)";
+# }
- $log =~ s|<br */>||g;
- decode_entities($log);
- my @log_lines = split(/[\r\n]+/, $log);
$rev = substr($rev, 0, 7);
my $project = $$self{project};
@@ -330,9 +308,9 @@ sub output_item {
feed => $project,
rev => $rev,
user => $creator,
- log => \@log_lines,
+ log => \@lines,
link => $link,
- prefix => $prefix,
+# prefix => $prefix,
targets => $$self{branches}{$branch}{targets},
);
@@ -365,4 +343,25 @@ sub get_self {
return $objects_by_package{$pkg};
}
+=head2 fetch_yaml
+
+Given a URL, fetches content and tries to parse as a YAML document. Returns
+undef on error.
+
+=cut
+
+my $lwp = LWP::UserAgent->new();
+$lwp->timeout(60);
+$lwp->env_proxy();
+
+sub fetch_yaml {
+ my $url = shift;
+ my $response = $lwp->get($url);
+ if($response->is_success) {
+ my $rv = Load($response->content);
+ return $rv;
+ }
+ return undef;
+}
+
1;
View
@@ -17,33 +17,33 @@ BEGIN { $tests = 0; };
plan tests => $tests;
-my $xml_header = << '__XML__';
-<?xml version="1.0" encoding="UTF-8"?>
-<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
- <id>tag:github.com,2008:/feeds/rakudo/commits/rakudo/master</id>
- <link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commits/master/"/>
- <link type="application/atom+xml" rel="self" href="http://github.com/feeds/rakudo/commits/rakudo/master"/>
- <title>Recent Commits to rakudo:master</title>
-__XML__
+my $yaml_header = << '__YAML__';
+---
+commits:
+__YAML__
-my $xml_footer = << '__XML__';
- <entry>
- <id>tag:github.com,2008:Grit::Commit/c7d2d7784f80b2c9f05b68d4aa5a6e21a2f2a257</id>
- <link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/c7d2d7784f80b2c9f05b68d4aa5a6e21a2f2a257"/>
- <title>Merge branch 'master' of git@github.com:rakudo/rakudo</title>
- <updated>2009-05-01T09:32:55-07:00</updated>
- <content type="html">&lt;pre&gt;
-Merge branch 'master' of git@github.com:rakudo/rakudo&lt;/pre&gt;</content>
- <author>
- <name>pmichaud</name>
- </author>
- </entry>
-</feed>
-__XML__
+my $yaml_footer = << '__YAML__';
+- parents:
+ - id: 83b2cdfa64becdef052417962cc114e38f5920d8
+ author:
+ name: Carl Masak
+ login: masak
+ email: cmasak@gmail.com
+ url: http://github.com/rakudo/rakudo/commit/73a76745a0de01b7361a75ac8347c516532f92aa
+ id: 73a76745a0de01b7361a75ac8347c516532f92aa
+ committed_date: "2010-08-15T16:59:44-07:00"
+ authored_date: "2010-08-15T16:59:44-07:00"
+ message: "[Buf] added prefix/infix ~^, infix ~& and infix ~|"
+ tree: c797ca702afa584fa8a28d44376b3e942174c30e
+ committer:
+ name: Carl Masak
+ login: masak
+ email: cmasak@gmail.com
+__YAML__
# initial sync
-my $xml = $xml_header . '<updated>2009-05-01T09:32:55-07:00</updated>' . $xml_footer;
-my $feed = XML::Atom::Feed->new(\$xml);
+my $yaml = $yaml_header . $yaml_footer;
+my $feed = YAML::Syck::Load($yaml);
my $rl = modules::local::rakudolog->get_self();
ok(!exists($$rl{lastrev}), "no lastrev by default");
call_func('process_branch', 'master', $feed);
@@ -55,28 +55,26 @@ BEGIN { $tests += 3 };
# update
reset_output();
$$rl{not_first_time} = 1;
-$xml_footer = << '__XML__' . $xml_footer;
- <entry>
- <id>tag:github.com,2008:Grit::Commit/7f5af50c19baf360dacc5779b9c013fb14db34d3</id>
- <link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/7f5af50c19baf360dacc5779b9c013fb14db34d3"/>
- <title>Big refactor of Rakudo's enums, making them more compliant with S12, and building them with much less generated code. Track an enum related grammar change from STD.pm too. Also gets rid of various bits of cruft that only hung around because of the previous enums implementation needing them. Bool is no longer sort-of-enum-ish (before we had some curious interactions there). Also an infinite loop in infix:&lt;but&gt; is fixed.</title>
- <updated>2009-05-01T09:58:40-07:00</updated>
- <content type="html">&lt;pre&gt;m src/builtins/enums.pir
-m src/builtins/guts.pir
-m src/builtins/op.pir
-m src/classes/Abstraction.pir
-m src/classes/Bool.pir
-m src/parser/actions.pm
-m src/parser/grammar.pg
-
-Big refactor of Rakudo's enums, making them more compliant with S12, and building them with much less generated code. Track an enum related grammar change from STD.pm too. Also gets rid of various bits of cruft that only hung around because of the previous enums implementation needing them. Bool is no longer sort-of-enum-ish (before we had some curious interactions there). Also an infinite loop in infix:&amp;lt;but&amp;gt; is fixed.&lt;/pre&gt;</content>
- <author>
- <name>jnthn</name>
- </author>
- </entry>
-__XML__
-$xml = $xml_header . '<updated>2009-05-01T09:58:40-07:00</updated>' . $xml_footer;
-$feed = XML::Atom::Feed->new(\$xml);
+$yaml_footer = << '__YAML__' . $yaml_footer;
+- parents:
+ - id: 73a76745a0de01b7361a75ac8347c516532f92aa
+ author:
+ name: Carl Masak
+ login: masak
+ email: cmasak@gmail.com
+ url: http://github.com/rakudo/rakudo/commit/bef86ee3a3253a8840c077e5d1a089579949a58a
+ id: bef86ee3a3253a8840c077e5d1a089579949a58a
+ committed_date: "2010-08-15T17:04:08-07:00"
+ authored_date: "2010-08-15T17:04:08-07:00"
+ message: "[t/spectest.data] added S03-operators/buf.t"
+ tree: c809290826b06fa32adae1a76d1d987ed632b5cc
+ committer:
+ name: Carl Masak
+ login: masak
+ email: cmasak@gmail.com
+__YAML__
+$yaml = $yaml_header . $yaml_footer;
+$feed = YAML::Syck::Load($yaml);
call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 6, "6 lines of output");
@@ -87,78 +85,3 @@ is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
like($$output[1]{text}, qr|rakudo: |, "master branch");
BEGIN { $tests += 7 };
-
-# update with multiple commits having the same timestamp
-reset_output();
-$xml_footer = << '__XML__' . $xml_footer;
- <entry>
- <id>tag:github.com,2008:Grit::Commit/5bd02be9924c2f6013e4601e55d103b1e1a30a14</id>
- <link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/5bd02be9924c2f6013e4601e55d103b1e1a30a14"/>
- <title>Small optimizations to signature binding; costs us a PMC creation and a method call less every invocation of something that has a signature, which gives a 7% speed-up in a calling benchmark.</title>
- <updated>2009-05-15T06:45:18-07:00</updated>
- <content type="html">&lt;pre&gt;m src/classes/Signature.pir
-
-Small optimizations to signature binding; costs us a PMC creation and a method call less every invocation of something that has a signature, which gives a 7% speed-up in a calling benchmark.&lt;/pre&gt;</content>
- <author>
- <name>jnthn</name>
- </author>
- </entry>
- <entry>
- <id>tag:github.com,2008:Grit::Commit/b49cce1a84c1f229d1c542c2dc2556e2912aa960</id>
- <link type="text/html" rel="alternate" href="http://github.com/rakudo/rakudo/commit/b49cce1a84c1f229d1c542c2dc2556e2912aa960"/>
- <title>Add some micro-benchmakrs.</title>
- <updated>2009-05-15T06:45:18-07:00</updated>
- <content type="html">&lt;pre&gt;+ tools/benchmark.pl
-
-Add some micro-benchmakrs.&lt;/pre&gt;</content>
- <author>
- <name>jnthn</name>
- </author>
- </entry>
-__XML__
-$xml = $xml_header . '<updated>2009-05-15T06:45:18-07:00</updated>' . $xml_footer;
-$feed = XML::Atom::Feed->new(\$xml);
-call_func('process_branch', 'master', $feed);
-$output = [output()];
-is(scalar @$output, 12, "12 lines of output");
-is($$output[0]{net} , 'magnet' , "line to magnet/#parrot");
-is($$output[0]{chan}, '#parrot' , "line to magnet/#parrot");
-is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
-is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
-# The module sorts by <updated> time, but the time is the same for these two commits.
-# Do it this way so we don't depend on perl's internal sort algorithm details.
-my @message_list = ($$output[2]{text}, $$output[8]{text});
-is(scalar grep(/Small optimizations/ , @message_list), 1, "log message");
-is(scalar grep(/Add some micro-bench/, @message_list), 1, "log message");
-BEGIN { $tests += 7 };
-
-# update with their post-2010-02-26 feed format (added rel attribute to the <link> tag)
-reset_output();
-$xml_footer = << '__XML__' . $xml_footer;
- <entry>
- <id>tag:github.com,2008:Grit::Commit/b131f6052a181bdad8f7b9e5abe30c9c2c2e360e</id>
- <link type="text/html" href="http://github.com/rakudo/rakudo/commit/b131f6052a181bdad8f7b9e5abe30c9c2c2e360e" rel="alternate"/>
- <title>Split .defined method and defined vtable method for language interop, as per pmichaud++.</title>
- <updated>2010-02-27T16:46:53-08:00</updated>
- <content type="html">&lt;pre&gt;m src/builtins/Parcel.pir
-
-Split .defined method and defined vtable method for language interop, as per pmichaud++.&lt;/pre&gt;</content>
- <author>
- <name>Jonathan Worthington</name>
- </author>
- </entry>
-__XML__
-$xml = $xml_header . '<updated>2010-02-27T16:46:53-08:00</updated>' . $xml_footer;
-$feed = XML::Atom::Feed->new(\$xml);
-call_func('process_branch', 'master', $feed);
-$output = [output()];
-is(scalar @$output, 6, "6 lines of output");
-is($$output[0]{net} , 'magnet' , "line to magnet/#parrot");
-is($$output[0]{chan}, '#parrot' , "line to magnet/#parrot");
-is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
-is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
-# The module sorts by <updated> time, but the time is the same for these two commits.
-# Do it this way so we don't depend on perl's internal sort algorithm details.
-@message_list = ($$output[2]{text});
-is(scalar grep(/Split .defined method/ , @message_list), 1, "log message");
-BEGIN { $tests += 6 };

0 comments on commit db2d931

Please sign in to comment.