Skip to content

Commit

Permalink
Fixes for our awful html history parsing heuristics
Browse files Browse the repository at this point in the history
  • Loading branch information
obra committed Jun 5, 2009
1 parent 605ac65 commit 4484ea2
Showing 1 changed file with 62 additions and 63 deletions.
125 changes: 62 additions & 63 deletions lib/Net/Trac/TicketHistoryEntry.pm
Expand Up @@ -58,13 +58,13 @@ has connection => (

has prop_changes => ( isa => 'HashRef', is => 'rw' );

has is_create => ( isa => 'Bool', is => 'rw', default => 0 );
has author => ( isa => 'Str', is => 'rw' );
has date => ( isa => 'DateTime', is => 'rw' );
has category => ( isa => 'Str', is => 'rw' );
has content => ( isa => 'Str', is => 'rw' );
has is_create => ( isa => 'Bool', is => 'rw', default => 0 );
has author => ( isa => 'Str', is => 'rw' );
has date => ( isa => 'DateTime', is => 'rw' );
has category => ( isa => 'Str', is => 'rw' );
has content => ( isa => 'Str', is => 'rw' );
has attachment => ( isa => 'Net::Trac::TicketAttachment', is => 'rw' );
has ticket => ( isa => 'Net::Trac::Ticket', is => 'rw', weak_ref => 1);
has ticket => ( isa => 'Net::Trac::Ticket', is => 'rw', weak_ref => 1 );

=head1 METHODS
Expand All @@ -77,129 +77,128 @@ out the fields of this class.

sub parse_feed_entry {
my $self = shift;
my $e = shift;
my $e = shift;

# We use a reference to a copy of ticket state as it was after this feed
# We use a reference to a copy of ticket state as it was after this feed
# entry to interpret what "x added, y removed" meant for absolute values
# of keywords

my $ticket_state = shift;
my $ticket_state = shift;

if ($e =~ m|<dc:creator>(.*?)</dc:creator>|is) {
if ( $e =~ m|<dc:creator>(.*?)</dc:creator>|is ) {
my $author = $1;
$self->author( $author );
$self->author($author);
}

if ($e =~ m|<pubDate>(.*?)</pubDate>|is) {
if ( $e =~ m|<pubDate>(.*?)</pubDate>|is ) {
my $date = $1;
$self->date(DateTime->from_epoch(epoch => str2time($date)));
$self->date( DateTime->from_epoch( epoch => str2time($date) ) );
}

if ($e =~ m|<category>(.*?)</category>|is) {
if ( $e =~ m|<category>(.*?)</category>|is ) {
my $c = $1;
$self->category($c);
}


if ($e =~ m|<description>\s*(.*?)\s*</description>|is) {
if ( $e =~ m|<description>\s*(.*?)\s*</description>|is ) {
my $desc = $1;

if ( $desc =~ s|^\s*?&lt;ul&gt;(.*?)&lt;/ul&gt;||is ) {
my $props = $1;
$self->prop_changes( $self->_parse_props( $props, $ticket_state ) );
}

$desc =~ s/&gt;/>/gi;
$desc =~ s/&lt;/</gi;
$desc =~ s/&amp;/&/gi;

if ( $desc =~ s|^\s*?<ul>(.*)</ul>||is) {
my $props = $1;
$self->prop_changes( $self->_parse_props($props, $ticket_state) );
}

$self->content($desc);
$self->content($desc);
}
}

sub _parse_props {
my $self = shift;
my $raw = shift || '';
my $self = shift;
my $raw = shift || '';
my $ticket_state = shift;
$raw =~ s/&gt;/>/gi;
$raw =~ s/&lt;/</gi;
$raw =~ s/&amp;/&/gi;

# throw out the wrapping <li>
$raw =~ s|^\s*?<li>(.*)</li>\s*?$|$1|is;
$raw =~ s|^\s*?<li>(.*)</li>\s*?$|$1|is;
my @prop_lines = split( m#</li>\s*<li>#s, $raw );
my $props = {};
my $props = {};

foreach my $line (@prop_lines) {
my ($prop, $old, $new);
if ($line =~ m{<strong>attachment</strong>}) {
my ( $prop, $old, $new );
if ( $line =~ m{<strong>attachment</strong>} ) {
my ($name) = $line =~ m!<em>(.*?)</em>!;
my $content = $self->connection->_fetch(
"/attachment/ticket/" . $self->ticket->id . "/$name" )
or next;
my $content = $self->connection->_fetch( "/attachment/ticket/" . $self->ticket->id . "/$name" )
or next;

if ( $content =~
m{<div id="content" class="attachment">(.+?)</div>}is )
{
if ( $content =~ m{<div id="content" class="attachment">(.+?)</div>}is ) {
my $frag = $1;
my $att = Net::Trac::TicketAttachment->new(
connection => $self->connection,
ticket => $self->ticket->id,
filename => $name,
);
$att->_parse_html_chunk($frag);
$self->attachment( $att );
$self->attachment($att);
}

next;
}
if ($line =~ m{<strong>description</strong>}) {
# We can't parse trac's crazy "go read a diff on a webpage handling
if ( $line =~ m{<strong>description</strong>} ) {

# We can't parse trac's crazy "go read a diff on a webpage handling
# of descriptions
next;
next;
}
if ($line =~ m{<strong>(keywords|cc)</strong>(.*)$}is ) {
if ( $line =~ m{<strong>(keywords|cc)</strong>(.*)$}is ) {
my $value_changes = $2;
$prop = $1;
my (@added, @removed);
if ($value_changes =~ m{^\s*<em>(.*?)</em> added}is) {
my $added = $1;
@added = split(m{</em>\s*<em>}is, $added);
}
my ( @added, @removed );
if ( $value_changes =~ m{^\s*<em>(.*?)</em> added}is ) {
my $added = $1;
@added = split( m{</em>\s*<em>}is, $added );
}

if ($value_changes =~ m{(?:^|added;)\s*<em>(.*)</em> removed}is) {
my $removed = $1;
@removed = split(m{</em>\s*?<em>}is, $removed);
if ( $value_changes =~ m{(?:^|added;)\s*<em>(.*)</em> removed}is ) {
my $removed = $1;
@removed = split( m{</em>\s*?<em>}is, $removed );

}
my @before = ();
my @after = grep defined && length, split (/\s+/,$ticket_state->{keywords});
for my $value (@after) {
next if grep {$_ eq $value} @added;

my @before = ();
my @after = grep defined && length, split( /\s+/, $ticket_state->{keywords} );
for my $value (@after) {
next if grep { $_ eq $value } @added;
push @before, $value;
}

$old = join(' ', sort (@before, @removed));
$new = join(' ', sort (@after));
$old = join( ' ', sort ( @before, @removed ) );
$new = join( ' ', sort (@after) );
$ticket_state->{$prop} = $old;
}
elsif ( $line =~ m{<strong>(.*?)</strong>\s+changed\s+from\s+<em>(.*)</em>\s+to\s+<em>(.*)</em>}is ) {
} elsif ( $line =~ m{<strong>(.*?)</strong>\s+changed\s+from\s+<em>(.*?)</em>\s+to\s+<em>(.*?)</em>}is ) {
$prop = $1;
$old = $2;
$new = $3;
} elsif ( $line =~ m{<strong>(.*?)</strong>\s+set\s+to\s+<em>(.*)</em>}is ) {
} elsif ( $line =~ m{<strong>(.*?)</strong>\s+set\s+to\s+<em>(.*?)</em>}is ) {
$prop = $1;
$old = '';
$new = $2;
} elsif ( $line =~ m{<strong>(.*?)</strong>\s+<em>(.*?)</em>\s+deleted}is ) {
$prop = $1;
$old = $2;
$old = $2;
$new = '';
} elsif ( $line =~ m{<strong>(.*?)</strong>\s+deleted}is ) {
$prop = $1;
$new = '';
}
else {
warn "could not parse ". $line;
} else {
warn "could not parse " . $line;
}

if ( $prop ) {
if ($prop) {
my $pc = Net::Trac::TicketPropChange->new(
property => $prop,
new_value => $new,
Expand Down

0 comments on commit 4484ea2

Please sign in to comment.