Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 300 lines (220 sloc) 8.08 kb
4242da4 @monken document classes which describe indexes and tests
monken authored
1 package MetaCPAN::Document::File;
2 use Moose;
6bc44eb @monken fixed new naming
monken authored
3 use ElasticSearchX::Model::Document;
4242da4 @monken document classes which describe indexes and tests
monken authored
4
5 use URI::Escape ();
6 use MetaCPAN::Pod::XHTML;
7 use Pod::Text;
8 use Plack::MIME;
9 use List::MoreUtils qw(uniq);
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
10 use MetaCPAN::Util;
9cbbc83 @monken improved types
monken authored
11 use MetaCPAN::Types qw(:all);
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
12 use MooseX::Types::Moose qw(ArrayRef);
4242da4 @monken document classes which describe indexes and tests
monken authored
13
db49446 @monken update to document classes and tests
monken authored
14 Plack::MIME->add_type( ".t" => "text/x-script.perl" );
15 Plack::MIME->add_type( ".pod" => "text/x-script.perl" );
16 Plack::MIME->add_type( ".xs" => "text/x-c" );
4242da4 @monken document classes which describe indexes and tests
monken authored
17
1d41a41 @monken documentation
monken authored
18 =head1 PROPERTIES
19
20 =head2 abstract
21
22 Abstract of the documentation (if any). This is built by parsing the
23 C<NAME> section. It also sets L</documentation> if it succeeds.
24
25 =head2 id
26
27 Unique identifier of the release. Consists of the L</author>'s pauseid and
28 the release L</name>. See L</ElasticSearchX::Model::Util::digest>.
29
30 =head2 module
31
32 An ArrayRef of L<MetaCPAN::Document::Module> objects, that represent
33 modules defined in that class (i.e. package declarations).
34
35 =head2 date
36
37 B<Required>
38
39 Release date (i.e. C<mtime> of the tarball).
40
41 =head2 distribution
42
43 =head2 distribution.analyzed
44
45 =head2 distribution.camelcase
46
47 Name of the distribution (e.g. C<Some-Module>).
48
49 =head2 author
50
51 PAUSE ID of the author.
52
53 =head2 status
54
55 Valid values are C<latest>, C<cpan>, and C<backpan>. The most recent upload
56 of a distribution is tagged as C<latest> as long as it's not a developer
57 release, unless there are only developer releases. Everything else is
58 tagged C<cpan>. Once a release is deleted from PAUSE it is tagged as
59 C<backpan>.
60
61 =head2 maturity
62
63 Maturity of the release. This can either be C<released> or C<developer>.
64 See L<CPAN::DistnameInfo>.
65
66 =head2 directory
67
68 Return true if this object represents a directory.
69
70 =head2 documentation
71
72 Holds the name for the documentation in this file.
73
74 If the file L</is_pod_file|is a pod file, the name is derived from the
75 C<NAME> section. If the file L</is_perl_file|is a perl file> and the
76 name from the C<NAME> section matches on of the modules in L</module>,
77 it returns the name. Otherwise it returns the name of the first module
78 in L</module>. If there are no modules in the file the documentation is
79 set to C<undef>.
80
81 =head2 indexed
82
83 B<Default 0>
84
85 Indicates whether the file should be included in the search index or
86 not. If the L</documentation> refers to an unindexed module in
87 L</module>, the file is considered unindexed.
88
89 =head2 level
90
91 Level of this file in the directory tree of the release (i.e. C<META.yml>
92 has a level of C<0>).
93
94 =head2 pod
95
96 Pure text format of the pod (see L</Pod::Text>.
97
98 =head2 pod_lines
99
100 ArrayRef of ArrayRefs of offset and length of pod blocks. Example:
101
102 # Two blocks of pod, starting at line 1 and line 15 with length
103 # of 10 lines each
104 [[1,10], [15,10]]
105
106 =head2 sloc
107
108 Source Lines of Code. Strips empty lines, pod and C<END> section from
109 L</content> and returns the number of lines.
110
111 =head2 slop
112
113 Source Lines of Pod. Returns the number of pod lines using L</pod_lines>.
114
115 =head2 stat
116
117 L<File::stat> info of the tarball. Contains C<mode>, C<uid>, C<gid>, C<size>
118 and C<mtime>.
119
120 =cut
121
4242da4 @monken document classes which describe indexes and tests
monken authored
122 has id => ( id => [qw(author release path)] );
123
1d41a41 @monken documentation
monken authored
124 has [qw(path author name)];
125 has distribution => ( analyzer => [qw(standard camelcase)] );
ca79817 @monken remove module property for pod files
monken authored
126 has module => ( required => 0, is => 'rw', isa => Module, coerce => 1, clearer => 'clear_module' );
1d41a41 @monken documentation
monken authored
127 has documentation => ( is => 'rw', lazy_build => 1, index => 'analyzed', analyzer => [qw(standard camelcase)] );
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
128 has release => ( parent => 1 );
19fa756 @monken D::File includes now a list of modules, D::Module not indexed anymore
monken authored
129 has date => ( isa => 'DateTime' );
9cbbc83 @monken improved types
monken authored
130 has stat => ( isa => Stat, required => 0 );
db49446 @monken update to document classes and tests
monken authored
131 has sloc => ( isa => 'Int', lazy_build => 1 );
08e6739 @monken dropped Modern::Perl as prereq
monken authored
132 has slop => ( isa => 'Int', is => 'rw', default => 0 );
db49446 @monken update to document classes and tests
monken authored
133 has pod_lines => ( isa => 'ArrayRef', type => 'integer', lazy_build => 1, index => 'no' );
1221c1b @monken es mapping
monken authored
134 has pod => ( isa => 'ScalarRef', lazy_build => 1, index => 'analyzed', not_analyzed => 0, store => 'no', term_vector => 'with_positions_offsets' );
1d41a41 @monken documentation
monken authored
135 has mime => ( lazy_build => 1 );
1221c1b @monken es mapping
monken authored
136 has abstract => ( lazy_build => 1, not_analyzed => 0, index => 'analyzed' );
da2c7f7 @monken added status field
monken authored
137 has status => ( default => 'cpan' );
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
138 has maturity => ( default => 'released' );
255776e @monken fix indexing tag on modules
monken authored
139 has directory => ( isa => 'Bool', default => 0 );
140 has level => ( isa => 'Int', lazy_build => 1 );
c5cd188 @monken reintroduced indexed attr
monken authored
141 has indexed => ( is => 'rw', isa => 'Bool', default => 1 );
08e6739 @monken dropped Modern::Perl as prereq
monken authored
142
1d41a41 @monken documentation
monken authored
143 =head1 ATTRIBUTES
144
145 These attributes are not stored.
146
147 =head2 content
148
149 The content of the file. It is built by calling L</content_cb> and
150 stripping the C<DATA> section for performance reasons.
151
152 =head2 content_cb
153
154 Callback, that returns the content of the as ScalarRef.
155
156 =cut
157
1221c1b @monken es mapping
monken authored
158 has content => ( isa => 'ScalarRef', lazy_build => 1, property => 0, required => 0 );
6520d00 @monken got rid of (slow) PPI
monken authored
159 has content_cb => ( property => 0, required => 0 );
4242da4 @monken document classes which describe indexes and tests
monken authored
160
1d41a41 @monken documentation
monken authored
161 =head1 METHODS
162
163 =head2 is_perl_file
164
165 Return true if the file extension is one of C<pl>, C<pm>, C<pod>, C<t>
166 or if the file has no extension and the shebang line contains the
167 term C<perl>.
168
169 =head2 is_pod_file
170
171 Retruns true if the file extension is C<pod>.
172
173 =cut
174
4242da4 @monken document classes which describe indexes and tests
monken authored
175 sub is_perl_file {
18a8678 @monken fixes #78, implemented documentation property
monken authored
176 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
177 return 1 if($self->name =~ /\.(pl|pm|pod|t)$/i);
178 if($self->name !~ /\./) {
179 my $content = ${$self->content};
180 return 1 if($content =~ /^#!.*?perl/);
18a8678 @monken fixes #78, implemented documentation property
monken authored
181 }
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
182 return 0;
18a8678 @monken fixes #78, implemented documentation property
monken authored
183 }
184
ca79817 @monken remove module property for pod files
monken authored
185 sub is_pod_file {
186 shift->name =~ /\.pod$/i;
187 }
188
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
189 sub _build_documentation {
255776e @monken fix indexing tag on modules
monken authored
190 my $self = shift;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
191 $self->_build_abstract;
ca79817 @monken remove module property for pod files
monken authored
192 my $documentation = $self->documentation if($self->has_documentation);
da5ea0e @monken no documentation for non-pod docs and camelcase
monken authored
193 return undef unless(${$self->pod});
ca79817 @monken remove module property for pod files
monken authored
194 my @indexed = grep { $_->indexed } @{$self->module || []};
1c3f4d7 @monken fixed processing of .pod files
monken authored
195 if($documentation && $self->is_pod_file) {
196 return $documentation;
197 } elsif($documentation && grep {$_->name eq $documentation} @indexed) {
ca79817 @monken remove module property for pod files
monken authored
198 return $documentation;
199 } elsif(@indexed) {
200 return $indexed[0]->name;
201 } else {
202 return undef;
203 }
255776e @monken fix indexing tag on modules
monken authored
204 }
205
206 sub _build_level {
207 my $self = shift;
208 my @level = split(/\//, $self->path);
209 return @level - 1;
210 }
211
edf5352 @monken optimized for low memory footprint
monken authored
212 sub _build_content {
213 my $self = shift;
08e6739 @monken dropped Modern::Perl as prereq
monken authored
214 my @content = split("\n", ${$self->content_cb->()} || '');
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
215 my $content = "";
216 while(@content) {
217 my $line = shift @content;
218 last if($line =~ /^\s*__DATA__$/);
219 $content .= $line . "\n";
220 }
221 return \$content;
edf5352 @monken optimized for low memory footprint
monken authored
222 }
223
4242da4 @monken document classes which describe indexes and tests
monken authored
224 sub _build_mime {
225 Plack::MIME->mime_type( shift->name ) || 'text/plain';
226 }
227
db49446 @monken update to document classes and tests
monken authored
228 sub _build_abstract {
e41cd5b @monken fixes #90
monken authored
229 my $self = shift;
230 return undef unless ( $self->is_perl_file );
231 my $text = ${$self->content};
232 my ( $documentation, $abstract );
233 if ( $text =~ /^=head1 NAME\s+(\S+)((\h+-+\h+(.+))|(\n\n(.+)))?/ms ) {
234 chomp( $abstract = $4 || $6 ) if($4 || $6);
235 my $name = $1;
236 $documentation = $name if ( $name =~ /^[\w\.:-_']+$/ );
237 } elsif ( $text =~ /^=head1 NAME\s+([\w\.:-_']+?)\n/ms ) {
238 chomp( $documentation = $1 );
239 }
240
241 if ($abstract) {
242 $abstract =~ s{\n\h*\n\h*.*$}{}xms;
243 $abstract =~ s{\n}{ }gxms;
244 $abstract =~ s{\s+$}{}gxms;
245 $abstract =~ s{(\s)+}{$1}gxms;
246 $abstract = MetaCPAN::Util::strip_pod($abstract);
247 }
248
249 if ($documentation) {
250 $self->documentation(MetaCPAN::Util::strip_pod($documentation));
251 }
252 return $abstract;
a277d5c @monken removed Pod::POM dep and rewrote abstract and module extraction
monken authored
253
4242da4 @monken document classes which describe indexes and tests
monken authored
254 }
255
e41cd5b @monken fixes #90
monken authored
256
4242da4 @monken document classes which describe indexes and tests
monken authored
257 sub _build_path {
258 my $self = shift;
259 return join( '/', $self->release->name, $self->name );
260 }
261
262 sub _build_pod_lines {
263 my $self = shift;
264 return [] unless ( $self->is_perl_file );
c292592 @monken removed Lines.pm class, moved it in Util.pm
monken authored
265 my ($lines, $slop) = MetaCPAN::Util::pod_lines(${$self->content});
da2c7f7 @monken added status field
monken authored
266 $self->slop($slop || 0);
08e6739 @monken dropped Modern::Perl as prereq
monken authored
267 return $lines;
268
4242da4 @monken document classes which describe indexes and tests
monken authored
269 }
270
271 # Copied from Perl::Metrics2::Plugin::Core
272 sub _build_sloc {
273 my $self = shift;
274 return 0 unless ( $self->is_perl_file );
6520d00 @monken got rid of (slow) PPI
monken authored
275 my @content = split("\n", ${$self->content});
276 my $pods = 0;
277 map { splice(@content, $_->[0], $_->[1], map { '' } 1 .. $_->[1]) } @{$self->pod_lines};
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
278 my $sloc = 0;
279 while(@content) {
280 my $line = shift @content;
08e6739 @monken dropped Modern::Perl as prereq
monken authored
281 last if($line =~ /^\s*__END__/s);
7f44e2b @monken ignore data sections in files, end in sloccount
monken authored
282 $sloc++ if( $line !~ /^\s*#/ && $line =~ /\S/ );
283 }
284 return $sloc;
4242da4 @monken document classes which describe indexes and tests
monken authored
285 }
286
9f9d48c @monken got rid of pod_html and toc, renamed pod_txt to pod
monken authored
287 sub _build_pod {
4242da4 @monken document classes which describe indexes and tests
monken authored
288 my $self = shift;
289 return \'' unless ( $self->is_perl_file );
290 my $parser = Pod::Text->new( sentence => 0, width => 78 );
291
292 my $text = "";
293 $parser->output_string( \$text );
294 $parser->parse_string_document( ${ $self->content } );
295
296 return \$text;
297 }
298
299 __PACKAGE__->meta->make_immutable;
Something went wrong with that request. Please try again.