Skip to content
Newer
Older
100644 194 lines (166 sloc) 5.83 KB
4242da4 @monken document classes which describe indexes and tests
monken authored
1 package MetaCPAN::Document::File;
2 use Moose;
6bc44eb @monken fixed new naming
monken authored
3 use ElasticSearchX::Model::Document;
4242da4 @monken document classes which describe indexes and tests
monken authored
4
5 use URI::Escape ();
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
6 use Pod::Tree;
4242da4 @monken document classes which describe indexes and tests
monken authored
7 use MetaCPAN::Pod::XHTML;
8 use Pod::Text;
9 use Plack::MIME;
10 use List::MoreUtils qw(uniq);
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
11 use MetaCPAN::Util;
9cbbc83 @monken improved types
monken authored
12 use MetaCPAN::Types qw(:all);
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
13 use MooseX::Types::Moose qw(ArrayRef);
4242da4 @monken document classes which describe indexes and tests
monken authored
14
db49446 @monken update to document classes and tests
monken authored
15 Plack::MIME->add_type( ".t" => "text/x-script.perl" );
16 Plack::MIME->add_type( ".pod" => "text/x-script.perl" );
17 Plack::MIME->add_type( ".xs" => "text/x-c" );
4242da4 @monken document classes which describe indexes and tests
monken authored
18
19 has id => ( id => [qw(author release path)] );
20
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
21 has [qw(path author name distribution)] => ();
ca79817 @monken remove module property for pod files
monken authored
22 has module => ( required => 0, is => 'rw', isa => Module, coerce => 1, clearer => 'clear_module' );
98fed3a @monken added standard analyzer
monken authored
23 has documentation => ( required => 1, is => 'rw', lazy_build => 1, index => 'analyzed', analyzer => [qw(standard camelcase)] );
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
24 has release => ( parent => 1 );
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
25 has date => ( isa => 'DateTime' );
9cbbc83 @monken improved types
monken authored
26 has stat => ( isa => Stat, required => 0 );
db49446 @monken update to document classes and tests
monken authored
27 has sloc => ( isa => 'Int', lazy_build => 1 );
08e6739 @monken dropped Modern::Perl as prereq
monken authored
28 has slop => ( isa => 'Int', is => 'rw', default => 0 );
db49446 @monken update to document classes and tests
monken authored
29 has pod_lines => ( isa => 'ArrayRef', type => 'integer', lazy_build => 1, index => 'no' );
1221c1b @monken es mapping
monken authored
30 has pod => ( isa => 'ScalarRef', lazy_build => 1, index => 'analyzed', not_analyzed => 0, store => 'no', term_vector => 'with_positions_offsets' );
255776e @monken fix indexing tag on modules
monken authored
31 has [qw(mime)] => ( lazy_build => 1 );
1221c1b @monken es mapping
monken authored
32 has abstract => ( lazy_build => 1, not_analyzed => 0, index => 'analyzed' );
da2c7f7 @monken added status field
monken authored
33 has status => ( default => 'cpan' );
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
34 has maturity => ( default => 'released' );
255776e @monken fix indexing tag on modules
monken authored
35 has directory => ( isa => 'Bool', default => 0 );
36 has level => ( isa => 'Int', lazy_build => 1 );
c5cd188 @monken reintroduced indexed attr
monken authored
37 has indexed => ( is => 'rw', isa => 'Bool', default => 1 );
08e6739 @monken dropped Modern::Perl as prereq
monken authored
38
1221c1b @monken es mapping
monken authored
39 has content => ( isa => 'ScalarRef', lazy_build => 1, property => 0, required => 0 );
4242da4 @monken document classes which describe indexes and tests
monken authored
40 has pom => ( lazy_build => 1, property => 0, required => 0 );
6520d00 @monken got rid of (slow) PPI
monken authored
41 has content_cb => ( property => 0, required => 0 );
4242da4 @monken document classes which describe indexes and tests
monken authored
42
43 sub is_perl_file {
18a8678 @monken fixes #78, implemented documentation property
monken authored
44 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
45 return 1 if($self->name =~ /\.(pl|pm|pod|t)$/i);
46 if($self->name !~ /\./) {
47 my $content = ${$self->content};
48 return 1 if($content =~ /^#!.*?perl/);
18a8678 @monken fixes #78, implemented documentation property
monken authored
49 }
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
50 return 0;
18a8678 @monken fixes #78, implemented documentation property
monken authored
51 }
52
ca79817 @monken remove module property for pod files
monken authored
53 sub is_pod_file {
54 shift->name =~ /\.pod$/i;
55 }
56
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
57 sub _build_documentation {
255776e @monken fix indexing tag on modules
monken authored
58 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
59 $self->_build_abstract;
ca79817 @monken remove module property for pod files
monken authored
60 my $documentation = $self->documentation if($self->has_documentation);
da5ea0e @monken no documentation for non-pod docs and camelcase
monken authored
61 return undef unless(${$self->pod});
ca79817 @monken remove module property for pod files
monken authored
62 my @indexed = grep { $_->indexed } @{$self->module || []};
1c3f4d7 @monken fixed processing of .pod files
monken authored
63 if($documentation && $self->is_pod_file) {
64 return $documentation;
65 } elsif($documentation && grep {$_->name eq $documentation} @indexed) {
ca79817 @monken remove module property for pod files
monken authored
66 return $documentation;
67 } elsif(@indexed) {
68 return $indexed[0]->name;
69 } else {
70 return undef;
71 }
255776e @monken fix indexing tag on modules
monken authored
72 }
73
74 sub _build_level {
75 my $self = shift;
76 my @level = split(/\//, $self->path);
77 return @level - 1;
78 }
79
edf5352 @monken optimized for low memory footprint
monken authored
80 sub _build_content {
81 my $self = shift;
08e6739 @monken dropped Modern::Perl as prereq
monken authored
82 my @content = split("\n", ${$self->content_cb->()} || '');
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
83 my $content = "";
84 while(@content) {
85 my $line = shift @content;
86 last if($line =~ /^\s*__DATA__$/);
87 $content .= $line . "\n";
88 }
89 return \$content;
edf5352 @monken optimized for low memory footprint
monken authored
90 }
91
4242da4 @monken document classes which describe indexes and tests
monken authored
92 sub _build_mime {
93 Plack::MIME->mime_type( shift->name ) || 'text/plain';
94 }
95
96 sub _build_pom {
97 my $self = shift;
1221c1b @monken es mapping
monken authored
98 return undef unless($self->is_perl_file);
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
99 my $pod = Pod::Tree->new;
100 $pod->load_string( ${ $self->content } );
101 return $pod;
4242da4 @monken document classes which describe indexes and tests
monken authored
102 }
db49446 @monken update to document classes and tests
monken authored
103
104 sub _build_abstract {
105 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
106 return undef unless ( $self->is_perl_file );
107 my $root = $self->pom->get_root;
108 my $in_name = 0;
109 my ( $abstract, $documentation );
110 foreach my $node ( @{ $root->get_children } ) {
111 if ($in_name) {
112 last
113 if ( $node->get_type eq 'command'
114 && $node->get_command eq 'head1' );
115
b748fe7 @monken abstract parser
monken authored
116 my $text = MetaCPAN::Util::strip_pod($node->get_text);
117 # warn $text;
118 if ( $in_name == 1 && $text =~ /^\h*(\S+?)(\h+-+\h+(.*))?$/s ) {
119 chomp($abstract = $3);
120 my $name = $1;
121 $documentation = $name if($name =~ /^[\w:']+$/);
122 } elsif ( $in_name == 1 && $text =~ /^\h*([\w\:']+?)\n/s ) {
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
123 chomp($documentation = $1);
b748fe7 @monken abstract parser
monken authored
124 } elsif( $in_name == 2 && !$abstract && $text) {
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
125 chomp($abstract = $text);
126 }
127
128 if ($abstract) {
129 $abstract =~ s{=head.*}{}xms;
130 $abstract =~ s{\n\n.*$}{}xms;
131 $abstract =~ s{\n}{ }gxms;
132 $abstract =~ s{\s+$}{}gxms;
133 $abstract =~ s{(\s)+}{$1}gxms;
134 }
135 $in_name++;
4242da4 @monken document classes which describe indexes and tests
monken authored
136 }
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
137
138 last if ( $abstract && $documentation );
139
140 $in_name++
141 if ( $node->get_type eq 'command'
142 && $node->get_command eq 'head1'
143 && $node->get_text =~ /^NAME\s*$/ );
144
4242da4 @monken document classes which describe indexes and tests
monken authored
145 }
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
146 $self->documentation($documentation) if ($documentation);
147 return $abstract;
148
4242da4 @monken document classes which describe indexes and tests
monken authored
149 }
150
151 sub _build_path {
152 my $self = shift;
153 return join( '/', $self->release->name, $self->name );
154 }
155
156 sub _build_pod_lines {
157 my $self = shift;
158 return [] unless ( $self->is_perl_file );
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
159 my ($lines, $slop) = MetaCPAN::Util::pod_lines(${$self->content});
da2c7f7 @monken added status field
monken authored
160 $self->slop($slop || 0);
08e6739 @monken dropped Modern::Perl as prereq
monken authored
161 return $lines;
162
4242da4 @monken document classes which describe indexes and tests
monken authored
163 }
164
165 # Copied from Perl::Metrics2::Plugin::Core
166 sub _build_sloc {
167 my $self = shift;
168 return 0 unless ( $self->is_perl_file );
6520d00 @monken got rid of (slow) PPI
monken authored
169 my @content = split("\n", ${$self->content});
170 my $pods = 0;
171 map { splice(@content, $_->[0], $_->[1], map { '' } 1 .. $_->[1]) } @{$self->pod_lines};
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
172 my $sloc = 0;
173 while(@content) {
174 my $line = shift @content;
08e6739 @monken dropped Modern::Perl as prereq
monken authored
175 last if($line =~ /^\s*__END__/s);
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
176 $sloc++ if( $line !~ /^\s*#/ && $line =~ /\S/ );
177 }
178 return $sloc;
4242da4 @monken document classes which describe indexes and tests
monken authored
179 }
180
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
181 sub _build_pod {
4242da4 @monken document classes which describe indexes and tests
monken authored
182 my $self = shift;
183 return \'' unless ( $self->is_perl_file );
184 my $parser = Pod::Text->new( sentence => 0, width => 78 );
185
186 my $text = "";
187 $parser->output_string( \$text );
188 $parser->parse_string_document( ${ $self->content } );
189
190 return \$text;
191 }
192
193 __PACKAGE__->meta->make_immutable;
Something went wrong with that request. Please try again.