Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 466 lines (374 sloc) 12.451 kB
4242da4 @monken document classes which describe indexes and tests
monken authored
1 package MetaCPAN::Document::File;
2 use Moose;
6bc44eb @monken fixed new naming
monken authored
3 use ElasticSearchX::Model::Document;
4242da4 @monken document classes which describe indexes and tests
monken authored
4
5 use URI::Escape ();
6 use MetaCPAN::Pod::XHTML;
7 use Pod::Text;
8 use Plack::MIME;
9 use List::MoreUtils qw(uniq);
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
10 use MetaCPAN::Util;
9cbbc83 @monken improved types
monken authored
11 use MetaCPAN::Types qw(:all);
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
12 use MooseX::Types::Moose qw(ArrayRef);
08356a4 @monken some fixes to abstract parser
monken authored
13 use Encode;
14 use utf8;
4242da4 @monken document classes which describe indexes and tests
monken authored
15
db49446 @monken update to document classes and tests
monken authored
16 Plack::MIME->add_type( ".t" => "text/x-script.perl" );
24d86bf @monken fixes #95
monken authored
17 Plack::MIME->add_type( ".pod" => "text/x-pod" );
db49446 @monken update to document classes and tests
monken authored
18 Plack::MIME->add_type( ".xs" => "text/x-c" );
4242da4 @monken document classes which describe indexes and tests
monken authored
19
1d41a41 @monken documentation
monken authored
20 =head1 PROPERTIES
21
22 =head2 abstract
23
24 Abstract of the documentation (if any). This is built by parsing the
25 C<NAME> section. It also sets L</documentation> if it succeeds.
26
27 =head2 id
28
29 Unique identifier of the release. Consists of the L</author>'s pauseid and
30 the release L</name>. See L</ElasticSearchX::Model::Util::digest>.
31
32 =head2 module
33
34 An ArrayRef of L<MetaCPAN::Document::Module> objects, that represent
35 modules defined in that class (i.e. package declarations).
36
37 =head2 date
38
39 B<Required>
40
41 Release date (i.e. C<mtime> of the tarball).
42
627ff2d @monken added description property to Document::File
monken authored
43 =head2 description
44
45 Contains the C<DESCRIPTION> section of the POD if any. Will be stripped from
46 whitespaces and POD commands.
47
1d41a41 @monken documentation
monken authored
48 =head2 distribution
49
50 =head2 distribution.analyzed
51
52 =head2 distribution.camelcase
53
54 Name of the distribution (e.g. C<Some-Module>).
55
56 =head2 author
57
58 PAUSE ID of the author.
59
60 =head2 status
61
62 Valid values are C<latest>, C<cpan>, and C<backpan>. The most recent upload
63 of a distribution is tagged as C<latest> as long as it's not a developer
64 release, unless there are only developer releases. Everything else is
65 tagged C<cpan>. Once a release is deleted from PAUSE it is tagged as
66 C<backpan>.
67
68 =head2 maturity
69
70 Maturity of the release. This can either be C<released> or C<developer>.
71 See L<CPAN::DistnameInfo>.
72
73 =head2 directory
74
75 Return true if this object represents a directory.
76
77 =head2 documentation
78
79 Holds the name for the documentation in this file.
80
81 If the file L</is_pod_file|is a pod file, the name is derived from the
82 C<NAME> section. If the file L</is_perl_file|is a perl file> and the
83 name from the C<NAME> section matches on of the modules in L</module>,
84 it returns the name. Otherwise it returns the name of the first module
85 in L</module>. If there are no modules in the file the documentation is
86 set to C<undef>.
87
88 =head2 indexed
89
90 B<Default 0>
91
92 Indicates whether the file should be included in the search index or
93 not. If the L</documentation> refers to an unindexed module in
94 L</module>, the file is considered unindexed.
95
96 =head2 level
97
98 Level of this file in the directory tree of the release (i.e. C<META.yml>
99 has a level of C<0>).
100
101 =head2 pod
102
24d86bf @monken fixes #95
monken authored
103 Pure text format of the pod (see L</Pod::Text>). Consecutive whitespaces
104 are removed to save space and for better snippet previews.
1d41a41 @monken documentation
monken authored
105
106 =head2 pod_lines
107
108 ArrayRef of ArrayRefs of offset and length of pod blocks. Example:
109
110 # Two blocks of pod, starting at line 1 and line 15 with length
111 # of 10 lines each
112 [[1,10], [15,10]]
113
114 =head2 sloc
115
116 Source Lines of Code. Strips empty lines, pod and C<END> section from
117 L</content> and returns the number of lines.
118
119 =head2 slop
120
121 Source Lines of Pod. Returns the number of pod lines using L</pod_lines>.
122
123 =head2 stat
124
125 L<File::stat> info of the tarball. Contains C<mode>, C<uid>, C<gid>, C<size>
126 and C<mtime>.
127
24d86bf @monken fixes #95
monken authored
128 =head2 version
129
130 Contains the raw version string.
131
132 =head2 version_numified
133
134 B<Required>, B<Lazy Build>
135
136 Numified version of L</version>. Contains 0 if there is no version or the
137 version could not be parsed.
138
1d41a41 @monken documentation
monken authored
139 =cut
140
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
141 has id => ( is => 'ro', id => [qw(author release path)] );
142
bb6039c @monken add lowercase analyzer
monken authored
143 has [qw(path author name)] => ( is => 'ro', required => 1 );
144 has [qw(release distribution)] => (
145 is => 'ro',
146 required => 1,
147 analyzer => [qw(standard camelcase lowercase)],
148 );
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
149 has module => (
dfe231f @monken include_in_root for nested properties
monken authored
150 required => 0,
151 is => 'rw',
152 isa => Module,
153 type => 'nested',
154 include_in_root => 1,
155 coerce => 1,
156 clearer => 'clear_module'
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
157 );
158 has documentation => (
159 required => 1,
160 is => 'rw',
161 lazy_build => 1,
162 index => 'analyzed',
163 predicate => 'has_documentation',
bb6039c @monken add lowercase analyzer
monken authored
164 analyzer => [qw(standard camelcase lowercase)]
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
165 );
bb6039c @monken add lowercase analyzer
monken authored
166 has date => ( is => 'ro', required => 1, isa => 'DateTime' );
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
167 has stat => ( is => 'ro', isa => Stat, required => 0, dynamic => 1 );
168 has sloc => ( is => 'ro', required => 1, isa => 'Int', lazy_build => 1 );
169 has slop =>
170 ( is => 'ro', required => 1, isa => 'Int', is => 'rw', lazy_build => 1 );
171 has pod_lines => (
172 is => 'ro',
173 required => 1,
174 isa => 'ArrayRef',
175 type => 'integer',
176 lazy_build => 1,
177 index => 'no'
178 );
24d86bf @monken fixes #95
monken authored
179
180 has pod => (
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
181 is => 'ro',
182 required => 1,
183 isa => 'ScalarRef',
184 lazy_build => 1,
185 index => 'analyzed',
186 not_analyzed => 0,
187 store => 'no',
188 term_vector => 'with_positions_offsets'
189 );
190
191 has mime => ( is => 'ro', required => 1, lazy_build => 1 );
192 has abstract =>
193 ( is => 'ro', required => 1, lazy_build => 1, index => 'analyzed' );
194 has description =>
195 ( is => 'ro', required => 1, lazy_build => 1, index => 'analyzed' );
196 has status => ( is => 'ro', required => 1, default => 'cpan' );
197 has authorized => ( required => 1, is => 'ro', isa => 'Bool', default => 1 );
198 has maturity => ( is => 'ro', required => 1, default => 'released' );
199 has directory => ( is => 'ro', required => 1, isa => 'Bool', default => 0 );
200 has level => ( is => 'ro', required => 1, isa => 'Int', lazy_build => 1 );
201 has indexed => ( required => 1, is => 'rw', isa => 'Bool', default => 1 );
202 has version => ( is => 'ro', required => 0 );
203 has version_numified =>
204 ( is => 'ro', isa => 'Num', lazy_build => 1, required => 1 );
24d86bf @monken fixes #95
monken authored
205
206 sub _build_version_numified {
207 my $self = shift;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
208 return 0 unless ( $self->version );
24d86bf @monken fixes #95
monken authored
209 return MetaCPAN::Util::numify_version( $self->version );
210 }
08e6739 @monken dropped Modern::Perl as prereq
monken authored
211
1d41a41 @monken documentation
monken authored
212 =head1 ATTRIBUTES
213
214 These attributes are not stored.
215
216 =head2 content
217
218 The content of the file. It is built by calling L</content_cb> and
219 stripping the C<DATA> section for performance reasons.
220
221 =head2 content_cb
222
223 Callback, that returns the content of the as ScalarRef.
224
225 =cut
226
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
227 has content => (
228 is => 'ro',
229 isa => 'ScalarRef',
230 lazy_build => 1,
231 property => 0,
232 required => 0
233 );
234 has content_cb => (
235 is => 'ro',
236 property => 0,
237 required => 0,
238 default => sub {
239 sub { \'' }
240 }
241 );
4242da4 @monken document classes which describe indexes and tests
monken authored
242
1d41a41 @monken documentation
monken authored
243 =head1 METHODS
244
245 =head2 is_perl_file
246
247 Return true if the file extension is one of C<pl>, C<pm>, C<pod>, C<t>
248 or if the file has no extension and the shebang line contains the
249 term C<perl>.
250
251 =head2 is_pod_file
252
253 Retruns true if the file extension is C<pod>.
254
255 =cut
256
4242da4 @monken document classes which describe indexes and tests
monken authored
257 sub is_perl_file {
18a8678 @monken fixes #78, implemented documentation property
monken authored
258 my $self = shift;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
259 return 0 if ( $self->directory );
260 return 1 if ( $self->name =~ /\.(pl|pm|pod|t)$/i );
261 return 1 if ( $self->mime eq "text/x-script.perl" );
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
262 return 0;
18a8678 @monken fixes #78, implemented documentation property
monken authored
263 }
264
ca79817 @monken remove module property for pod files
monken authored
265 sub is_pod_file {
266 shift->name =~ /\.pod$/i;
267 }
268
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
269 sub _build_documentation {
255776e @monken fix indexing tag on modules
monken authored
270 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
271 $self->_build_abstract;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
272 my $documentation = $self->documentation if ( $self->has_documentation );
273 return undef unless ( ${ $self->pod } );
274 my @indexed = grep { $_->indexed } @{ $self->module || [] };
275 if ( $documentation && $self->is_pod_file ) {
1c3f4d7 @monken fixed processing of .pod files
monken authored
276 return $documentation;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
277 }
278 elsif ( $documentation && grep { $_->name eq $documentation } @indexed ) {
ca79817 @monken remove module property for pod files
monken authored
279 return $documentation;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
280 }
281 elsif (@indexed) {
ca79817 @monken remove module property for pod files
monken authored
282 return $indexed[0]->name;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
283 }
0c6891d @monken add nested mappings
monken authored
284 elsif ( !@{ $self->module || [] } ) {
715e2b9 @monken index modules with POD but without a package declaration correctly
monken authored
285 return $documentation;
286 }
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
287 else {
ca79817 @monken remove module property for pod files
monken authored
288 return undef;
289 }
255776e @monken fix indexing tag on modules
monken authored
290 }
291
292 sub _build_level {
293 my $self = shift;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
294 my @level = split( /\//, $self->path );
255776e @monken fix indexing tag on modules
monken authored
295 return @level - 1;
296 }
297
edf5352 @monken optimized for low memory footprint
monken authored
298 sub _build_content {
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
299 my $self = shift;
300 my @content = split( "\n", ${ $self->content_cb->() } || '' );
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
301 my $content = "";
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
302 my $in_data = 0; # skip DATA section
303 while (@content) {
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
304 my $line = shift @content;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
305 if ( $line =~ /^\s*__END__\s*$/ ) {
cdab85f @monken fix POD in END section of a DATA section (see JSON.pm)
monken authored
306 $in_data = 0;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
307 }
308 elsif ( $line =~ /^\s*__DATA__\s*$/ ) {
cdab85f @monken fix POD in END section of a DATA section (see JSON.pm)
monken authored
309 $in_data++;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
310 }
311 elsif ( $in_data && $line =~ /^=head1/ ) {
2dad925 @monken fixes POD in __DATA__ section
monken authored
312 $in_data = 0;
313 }
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
314 next if ($in_data);
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
315 $content .= $line . "\n";
316 }
317 return \$content;
edf5352 @monken optimized for low memory footprint
monken authored
318 }
319
4242da4 @monken document classes which describe indexes and tests
monken authored
320 sub _build_mime {
0190716 @monken fix mime type for scripts without file extension, requires reindex
monken authored
321 my $self = shift;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
322 if ( !$self->directory && $self->name !~ /\./ ) {
323 my $content = ${ $self->content };
324 return "text/x-script.perl" if ( $content =~ /^#!.*?perl/ );
325 }
326 else {
0190716 @monken fix mime type for scripts without file extension, requires reindex
monken authored
327 return Plack::MIME->mime_type( $self->name ) || 'text/plain';
328 }
4242da4 @monken document classes which describe indexes and tests
monken authored
329 }
330
627ff2d @monken added description property to Document::File
monken authored
331 sub _build_description {
332 my $self = shift;
333 return undef unless ( $self->is_perl_file );
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
334 my $section = MetaCPAN::Util::extract_section( ${ $self->content },
335 'DESCRIPTION' );
627ff2d @monken added description property to Document::File
monken authored
336 return undef unless ($section);
337 my $parser = Pod::Text->new;
338 my $text = "";
339 $parser->output_string( \$text );
340 $parser->parse_string_document("=pod\n\n$section");
341 $text =~ s/\s+/ /g;
342 $text =~ s/^\s+//;
343 $text =~ s/\s+$//;
344 return $text;
345 }
346
db49446 @monken update to document classes and tests
monken authored
347 sub _build_abstract {
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
348 my $self = shift;
349 return undef unless ( $self->is_perl_file );
350 my $text = ${ $self->content };
351 my ( $documentation, $abstract );
352 my $section = MetaCPAN::Util::extract_section( $text, 'NAME' );
353 return undef unless ($section);
354 $section =~ s/^=\w+.*$//mg;
355 $section =~ s/X<.*?>//mg;
356 if ( $section =~ /^\s*(\S+)((\h+-+\h+(.+))|(\r?\n\h*\r?\n\h*(.+)))?/ms ) {
357 chomp( $abstract = $4 || $6 ) if ( $4 || $6 );
0769202 @monken fixed regression in abstract parser
monken authored
358 my $name = MetaCPAN::Util::strip_pod($1);
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
359 $documentation = $name if ( $name =~ /^[\w\.:\-_']+$/ );
360 }
361 if ($abstract) {
362 $abstract =~ s/^=\w+.*$//xms;
363 $abstract =~ s{\r?\n\h*\r?\n\h*.*$}{}xms;
364 $abstract =~ s{\n}{ }gxms;
365 $abstract =~ s{\s+$}{}gxms;
366 $abstract =~ s{(\s)+}{$1}gxms;
367 $abstract = MetaCPAN::Util::strip_pod($abstract);
368 }
4242da4 @monken document classes which describe indexes and tests
monken authored
369
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
370 if ($documentation) {
371 $self->documentation( MetaCPAN::Util::strip_pod($documentation) );
372 }
373 return $abstract;
374
375 }
e41cd5b @monken fixes #90
monken authored
376
4242da4 @monken document classes which describe indexes and tests
monken authored
377 sub _build_path {
378 my $self = shift;
379 return join( '/', $self->release->name, $self->name );
380 }
381
382 sub _build_pod_lines {
383 my $self = shift;
384 return [] unless ( $self->is_perl_file );
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
385 my ( $lines, $slop ) = MetaCPAN::Util::pod_lines( ${ $self->content } );
386 $self->slop( $slop || 0 );
08e6739 @monken dropped Modern::Perl as prereq
monken authored
387 return $lines;
ae6fe02 @monken build slop
monken authored
388 }
389
390 sub _build_slop {
391 my $self = shift;
392 return 0 unless ( $self->is_perl_file );
393 $self->_build_pod_lines;
394 return $self->slop;
4242da4 @monken document classes which describe indexes and tests
monken authored
395 }
396
397 # Copied from Perl::Metrics2::Plugin::Core
398 sub _build_sloc {
399 my $self = shift;
400 return 0 unless ( $self->is_perl_file );
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
401 my @content = split( "\n", ${ $self->content } );
6520d00 @monken got rid of (slow) PPI
monken authored
402 my $pods = 0;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
403 map {
404 splice( @content, $_->[0], $_->[1], map {''} 1 .. $_->[1] )
405 } @{ $self->pod_lines };
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
406 my $sloc = 0;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
407 while (@content) {
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
408 my $line = shift @content;
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
409 last if ( $line =~ /^\s*__END__/s );
410 $sloc++ if ( $line !~ /^\s*#/ && $line =~ /\S/ );
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
411 }
412 return $sloc;
4242da4 @monken document classes which describe indexes and tests
monken authored
413 }
414
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
415 sub _build_pod {
4242da4 @monken document classes which describe indexes and tests
monken authored
416 my $self = shift;
417 return \'' unless ( $self->is_perl_file );
418 my $parser = Pod::Text->new( sentence => 0, width => 78 );
419
420 my $text = "";
421 $parser->output_string( \$text );
422 $parser->parse_string_document( ${ $self->content } );
24d86bf @monken fixes #95
monken authored
423 $text =~ s/\s+/ /g;
4242da4 @monken document classes which describe indexes and tests
monken authored
424 return \$text;
425 }
426
427 __PACKAGE__->meta->make_immutable;
a76bf49 @monken release_id is parent, set method find
monken authored
428
429 package MetaCPAN::Document::File::Set;
430 use Moose;
431 extends 'ElasticSearchX::Model::Document::Set';
432
433 sub find {
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
434 my ( $self, $module ) = @_;
435 return $self->filter(
436 { and => [
437 { term => { 'documentation' => $module } },
438 { term => { 'file.indexed' => \1, } },
439 { term => { status => 'latest', } },
440 { not =>
441 { filter => { term => { 'file.authorized' => \0 } } }
442 },
443 ]
444 }
445 )->sort(
446 [ { 'date' => { order => "desc" } },
c9d2211 @monken fixed pod over pm, for real
monken authored
447 'mime',
a76bf49 @monken release_id is parent, set method find
monken authored
448 { 'stat.mtime' => { order => 'desc' } }
449 ]
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
450 )->first;
a76bf49 @monken release_id is parent, set method find
monken authored
451 }
452
cd8816f @rwstauner Find names of modules provided by a dist
rwstauner authored
453 sub find_provided_by {
454 my ( $self, $name ) = @_;
455 return $self->filter({
456 and => [
457 { term => { 'file.distribution' => $name } },
458 { term => { 'file.status' => 'latest' } },
459 { term => { 'file.module.authorized' => 1 } },
460 { term => { 'file.module.indexed' => 1 } },
461 ]
462 })->fields( [qw( file.module.name )] )->all;
463 }
464
11607a9 @monken take advantage of new shiny ESX::Model API
monken authored
465 __PACKAGE__->meta->make_immutable;
Something went wrong with that request. Please try again.