Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 373 lines (282 sloc) 9.982 kb
4242da4 @monken document classes which describe indexes and tests
monken authored
1 package MetaCPAN::Document::File;
2 use Moose;
6bc44eb @monken fixed new naming
monken authored
3 use ElasticSearchX::Model::Document;
4242da4 @monken document classes which describe indexes and tests
monken authored
4
5 use URI::Escape ();
6 use MetaCPAN::Pod::XHTML;
7 use Pod::Text;
8 use Plack::MIME;
9 use List::MoreUtils qw(uniq);
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
10 use MetaCPAN::Util;
9cbbc83 @monken improved types
monken authored
11 use MetaCPAN::Types qw(:all);
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
12 use MooseX::Types::Moose qw(ArrayRef);
08356a4 @monken some fixes to abstract parser
monken authored
13 use Encode;
14 use utf8;
4242da4 @monken document classes which describe indexes and tests
monken authored
15
db49446 @monken update to document classes and tests
monken authored
16 Plack::MIME->add_type( ".t" => "text/x-script.perl" );
24d86bf @monken fixes #95
monken authored
17 Plack::MIME->add_type( ".pod" => "text/x-pod" );
db49446 @monken update to document classes and tests
monken authored
18 Plack::MIME->add_type( ".xs" => "text/x-c" );
4242da4 @monken document classes which describe indexes and tests
monken authored
19
1d41a41 @monken documentation
monken authored
20 =head1 PROPERTIES
21
22 =head2 abstract
23
24 Abstract of the documentation (if any). This is built by parsing the
25 C<NAME> section. It also sets L</documentation> if it succeeds.
26
27 =head2 id
28
29 Unique identifier of the release. Consists of the L</author>'s pauseid and
30 the release L</name>. See L</ElasticSearchX::Model::Util::digest>.
31
32 =head2 module
33
34 An ArrayRef of L<MetaCPAN::Document::Module> objects, that represent
35 modules defined in that class (i.e. package declarations).
36
37 =head2 date
38
39 B<Required>
40
41 Release date (i.e. C<mtime> of the tarball).
42
627ff2d @monken added description property to Document::File
monken authored
43 =head2 description
44
45 Contains the C<DESCRIPTION> section of the POD if any. Will be stripped from
46 whitespaces and POD commands.
47
1d41a41 @monken documentation
monken authored
48 =head2 distribution
49
50 =head2 distribution.analyzed
51
52 =head2 distribution.camelcase
53
54 Name of the distribution (e.g. C<Some-Module>).
55
56 =head2 author
57
58 PAUSE ID of the author.
59
60 =head2 status
61
62 Valid values are C<latest>, C<cpan>, and C<backpan>. The most recent upload
63 of a distribution is tagged as C<latest> as long as it's not a developer
64 release, unless there are only developer releases. Everything else is
65 tagged C<cpan>. Once a release is deleted from PAUSE it is tagged as
66 C<backpan>.
67
68 =head2 maturity
69
70 Maturity of the release. This can either be C<released> or C<developer>.
71 See L<CPAN::DistnameInfo>.
72
73 =head2 directory
74
75 Return true if this object represents a directory.
76
77 =head2 documentation
78
79 Holds the name for the documentation in this file.
80
81 If the file L</is_pod_file|is a pod file, the name is derived from the
82 C<NAME> section. If the file L</is_perl_file|is a perl file> and the
83 name from the C<NAME> section matches on of the modules in L</module>,
84 it returns the name. Otherwise it returns the name of the first module
85 in L</module>. If there are no modules in the file the documentation is
86 set to C<undef>.
87
88 =head2 indexed
89
90 B<Default 0>
91
92 Indicates whether the file should be included in the search index or
93 not. If the L</documentation> refers to an unindexed module in
94 L</module>, the file is considered unindexed.
95
96 =head2 level
97
98 Level of this file in the directory tree of the release (i.e. C<META.yml>
99 has a level of C<0>).
100
101 =head2 pod
102
24d86bf @monken fixes #95
monken authored
103 Pure text format of the pod (see L</Pod::Text>). Consecutive whitespaces
104 are removed to save space and for better snippet previews.
1d41a41 @monken documentation
monken authored
105
106 =head2 pod_lines
107
108 ArrayRef of ArrayRefs of offset and length of pod blocks. Example:
109
110 # Two blocks of pod, starting at line 1 and line 15 with length
111 # of 10 lines each
112 [[1,10], [15,10]]
113
114 =head2 sloc
115
116 Source Lines of Code. Strips empty lines, pod and C<END> section from
117 L</content> and returns the number of lines.
118
119 =head2 slop
120
121 Source Lines of Pod. Returns the number of pod lines using L</pod_lines>.
122
123 =head2 stat
124
125 L<File::stat> info of the tarball. Contains C<mode>, C<uid>, C<gid>, C<size>
126 and C<mtime>.
127
24d86bf @monken fixes #95
monken authored
128 =head2 version
129
130 Contains the raw version string.
131
132 =head2 version_numified
133
134 B<Required>, B<Lazy Build>
135
136 Numified version of L</version>. Contains 0 if there is no version or the
137 version could not be parsed.
138
1d41a41 @monken documentation
monken authored
139 =cut
140
4242da4 @monken document classes which describe indexes and tests
monken authored
141 has id => ( id => [qw(author release path)] );
142
1d41a41 @monken documentation
monken authored
143 has [qw(path author name)];
144 has distribution => ( analyzer => [qw(standard camelcase)] );
ca79817 @monken remove module property for pod files
monken authored
145 has module => ( required => 0, is => 'rw', isa => Module, coerce => 1, clearer => 'clear_module' );
1d41a41 @monken documentation
monken authored
146 has documentation => ( is => 'rw', lazy_build => 1, index => 'analyzed', analyzer => [qw(standard camelcase)] );
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
147 has release => ( parent => 1 );
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
148 has date => ( isa => 'DateTime' );
24d86bf @monken fixes #95
monken authored
149 has stat => ( isa => Stat, required => 0, dynamic => 1 );
db49446 @monken update to document classes and tests
monken authored
150 has sloc => ( isa => 'Int', lazy_build => 1 );
ae6fe02 @monken build slop
monken authored
151 has slop => ( isa => 'Int', is => 'rw', lazy_build => 1 );
db49446 @monken update to document classes and tests
monken authored
152 has pod_lines => ( isa => 'ArrayRef', type => 'integer', lazy_build => 1, index => 'no' );
24d86bf @monken fixes #95
monken authored
153
154 has pod => (
155 isa => 'ScalarRef',
156 lazy_build => 1,
157 index => 'analyzed',
158 not_analyzed => 0,
159 store => 'no',
964356b @monken include pod in _all
monken authored
160 term_vector => 'with_positions_offsets' );
24d86bf @monken fixes #95
monken authored
161
1d41a41 @monken documentation
monken authored
162 has mime => ( lazy_build => 1 );
24d86bf @monken fixes #95
monken authored
163 has abstract => ( lazy_build => 1, index => 'analyzed' );
627ff2d @monken added description property to Document::File
monken authored
164 has description => ( lazy_build => 1, index => 'analyzed' );
da2c7f7 @monken added status field
monken authored
165 has status => ( default => 'cpan' );
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
166 has maturity => ( default => 'released' );
255776e @monken fix indexing tag on modules
monken authored
167 has directory => ( isa => 'Bool', default => 0 );
168 has level => ( isa => 'Int', lazy_build => 1 );
c5cd188 @monken reintroduced indexed attr
monken authored
169 has indexed => ( is => 'rw', isa => 'Bool', default => 1 );
24d86bf @monken fixes #95
monken authored
170 has version => ( required => 0 );
171 has version_numified => ( isa => 'Num', lazy_build => 1, required => 1 );
172
173 sub _build_version_numified {
174 my $self = shift;
175 return 0 unless($self->version);
176 return MetaCPAN::Util::numify_version( $self->version );
177 }
08e6739 @monken dropped Modern::Perl as prereq
monken authored
178
1d41a41 @monken documentation
monken authored
179 =head1 ATTRIBUTES
180
181 These attributes are not stored.
182
183 =head2 content
184
185 The content of the file. It is built by calling L</content_cb> and
186 stripping the C<DATA> section for performance reasons.
187
188 =head2 content_cb
189
190 Callback, that returns the content of the as ScalarRef.
191
192 =cut
193
1221c1b @monken es mapping
monken authored
194 has content => ( isa => 'ScalarRef', lazy_build => 1, property => 0, required => 0 );
6520d00 @monken got rid of (slow) PPI
monken authored
195 has content_cb => ( property => 0, required => 0 );
4242da4 @monken document classes which describe indexes and tests
monken authored
196
1d41a41 @monken documentation
monken authored
197 =head1 METHODS
198
199 =head2 is_perl_file
200
201 Return true if the file extension is one of C<pl>, C<pm>, C<pod>, C<t>
202 or if the file has no extension and the shebang line contains the
203 term C<perl>.
204
205 =head2 is_pod_file
206
207 Retruns true if the file extension is C<pod>.
208
209 =cut
210
4242da4 @monken document classes which describe indexes and tests
monken authored
211 sub is_perl_file {
18a8678 @monken fixes #78, implemented documentation property
monken authored
212 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
213 return 1 if($self->name =~ /\.(pl|pm|pod|t)$/i);
0190716 @monken fix mime type for scripts without file extension, requires reindex
monken authored
214 return 1 if($self->mime eq "text/x-script.perl");
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
215 return 0;
18a8678 @monken fixes #78, implemented documentation property
monken authored
216 }
217
ca79817 @monken remove module property for pod files
monken authored
218 sub is_pod_file {
219 shift->name =~ /\.pod$/i;
220 }
221
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
222 sub _build_documentation {
255776e @monken fix indexing tag on modules
monken authored
223 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
224 $self->_build_abstract;
ca79817 @monken remove module property for pod files
monken authored
225 my $documentation = $self->documentation if($self->has_documentation);
da5ea0e @monken no documentation for non-pod docs and camelcase
monken authored
226 return undef unless(${$self->pod});
ca79817 @monken remove module property for pod files
monken authored
227 my @indexed = grep { $_->indexed } @{$self->module || []};
1c3f4d7 @monken fixed processing of .pod files
monken authored
228 if($documentation && $self->is_pod_file) {
229 return $documentation;
230 } elsif($documentation && grep {$_->name eq $documentation} @indexed) {
ca79817 @monken remove module property for pod files
monken authored
231 return $documentation;
232 } elsif(@indexed) {
233 return $indexed[0]->name;
234 } else {
235 return undef;
236 }
255776e @monken fix indexing tag on modules
monken authored
237 }
238
239 sub _build_level {
240 my $self = shift;
241 my @level = split(/\//, $self->path);
242 return @level - 1;
243 }
244
edf5352 @monken optimized for low memory footprint
monken authored
245 sub _build_content {
246 my $self = shift;
08e6739 @monken dropped Modern::Perl as prereq
monken authored
247 my @content = split("\n", ${$self->content_cb->()} || '');
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
248 my $content = "";
cdab85f @monken fix POD in END section of a DATA section (see JSON.pm)
monken authored
249 my $in_data = 0; # skip DATA section
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
250 while(@content) {
251 my $line = shift @content;
cdab85f @monken fix POD in END section of a DATA section (see JSON.pm)
monken authored
252 if($line =~ /^\s*__END__\s*$/) {
253 $in_data = 0;
254 } elsif($line =~ /^\s*__DATA__\s*$/) {
255 $in_data++;
2dad925 @monken fixes POD in __DATA__ section
monken authored
256 } elsif($in_data && $line =~ /^=head1/) {
257 $in_data = 0;
258 }
cdab85f @monken fix POD in END section of a DATA section (see JSON.pm)
monken authored
259 next if($in_data);
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
260 $content .= $line . "\n";
261 }
262 return \$content;
edf5352 @monken optimized for low memory footprint
monken authored
263 }
264
4242da4 @monken document classes which describe indexes and tests
monken authored
265 sub _build_mime {
0190716 @monken fix mime type for scripts without file extension, requires reindex
monken authored
266 my $self = shift;
267 if($self->name !~ /\./) {
268 my $content = ${$self->content};
269 return "text/x-script.perl" if($content =~ /^#!.*?perl/);
270 } else {
271 return Plack::MIME->mime_type( $self->name ) || 'text/plain';
272 }
4242da4 @monken document classes which describe indexes and tests
monken authored
273 }
274
627ff2d @monken added description property to Document::File
monken authored
275 sub _build_description {
276 my $self = shift;
277 return undef unless ( $self->is_perl_file );
278 my $section =
279 MetaCPAN::Util::extract_section( ${ $self->content }, 'DESCRIPTION' );
280 return undef unless ($section);
281 my $parser = Pod::Text->new;
282 my $text = "";
283 $parser->output_string( \$text );
284 $parser->parse_string_document("=pod\n\n$section");
285 $text =~ s/\s+/ /g;
286 $text =~ s/^\s+//;
287 $text =~ s/\s+$//;
288 return $text;
289 }
290
291
db49446 @monken update to document classes and tests
monken authored
292 sub _build_abstract {
e41cd5b @monken fixes #90
monken authored
293 my $self = shift;
294 return undef unless ( $self->is_perl_file );
295 my $text = ${$self->content};
296 my ( $documentation, $abstract );
627ff2d @monken added description property to Document::File
monken authored
297 my $section = MetaCPAN::Util::extract_section($text, 'NAME');
298 $section =~ s/^=\w+.*$//mg;
299 $section =~ s/X<.*?>//mg;
1b94877 @monken factored out extract_section
monken authored
300 return undef unless($section);
1ce07b2 @monken fixes https://github.com/CPAN-API/metacpan-web/issues/24
monken authored
301 if ( $section =~ /^\s*(\S+)((\h+-+\h+(.+))|(\r?\n\h*\r?\n\h*(.+)))?/ms ) {
302 chomp( $abstract = $4 || $6 ) if($4 || $6);
303 my $name = $1;
e41cd5b @monken fixes #90
monken authored
304 $documentation = $name if ( $name =~ /^[\w\.:-_']+$/ );
305 }
306
307 if ($abstract) {
be27d08 @monken more abstract parsing fixes
monken authored
308 $abstract =~ s/^=\w+.*$//xms;
91887e2 @monken fixed abstract parser for win line endings
monken authored
309 $abstract =~ s{\r?\n\h*\r?\n\h*.*$}{}xms;
e41cd5b @monken fixes #90
monken authored
310 $abstract =~ s{\n}{ }gxms;
311 $abstract =~ s{\s+$}{}gxms;
312 $abstract =~ s{(\s)+}{$1}gxms;
313 $abstract = MetaCPAN::Util::strip_pod($abstract);
314 }
315
316 if ($documentation) {
317 $self->documentation(MetaCPAN::Util::strip_pod($documentation));
318 }
319 return $abstract;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
320
4242da4 @monken document classes which describe indexes and tests
monken authored
321 }
322
e41cd5b @monken fixes #90
monken authored
323
4242da4 @monken document classes which describe indexes and tests
monken authored
324 sub _build_path {
325 my $self = shift;
326 return join( '/', $self->release->name, $self->name );
327 }
328
329 sub _build_pod_lines {
330 my $self = shift;
331 return [] unless ( $self->is_perl_file );
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
332 my ($lines, $slop) = MetaCPAN::Util::pod_lines(${$self->content});
da2c7f7 @monken added status field
monken authored
333 $self->slop($slop || 0);
08e6739 @monken dropped Modern::Perl as prereq
monken authored
334 return $lines;
ae6fe02 @monken build slop
monken authored
335 }
336
337 sub _build_slop {
338 my $self = shift;
339 return 0 unless ( $self->is_perl_file );
340 $self->_build_pod_lines;
341 return $self->slop;
4242da4 @monken document classes which describe indexes and tests
monken authored
342 }
343
344 # Copied from Perl::Metrics2::Plugin::Core
345 sub _build_sloc {
346 my $self = shift;
347 return 0 unless ( $self->is_perl_file );
6520d00 @monken got rid of (slow) PPI
monken authored
348 my @content = split("\n", ${$self->content});
349 my $pods = 0;
350 map { splice(@content, $_->[0], $_->[1], map { '' } 1 .. $_->[1]) } @{$self->pod_lines};
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
351 my $sloc = 0;
352 while(@content) {
353 my $line = shift @content;
08e6739 @monken dropped Modern::Perl as prereq
monken authored
354 last if($line =~ /^\s*__END__/s);
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
355 $sloc++ if( $line !~ /^\s*#/ && $line =~ /\S/ );
356 }
357 return $sloc;
4242da4 @monken document classes which describe indexes and tests
monken authored
358 }
359
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
360 sub _build_pod {
4242da4 @monken document classes which describe indexes and tests
monken authored
361 my $self = shift;
362 return \'' unless ( $self->is_perl_file );
363 my $parser = Pod::Text->new( sentence => 0, width => 78 );
364
365 my $text = "";
366 $parser->output_string( \$text );
367 $parser->parse_string_document( ${ $self->content } );
24d86bf @monken fixes #95
monken authored
368 $text =~ s/\s+/ /g;
4242da4 @monken document classes which describe indexes and tests
monken authored
369 return \$text;
370 }
371
372 __PACKAGE__->meta->make_immutable;
Something went wrong with that request. Please try again.