Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Import into git from http://perlmonks.org/?node_id=598718
- Loading branch information
Max Maischein
committed
Feb 9, 2011
0 parents
commit 699cd6f
Showing
6 changed files
with
258 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Makefile | ||
Makefile.old | ||
*.tar.gz | ||
*.bak | ||
pm_to_blib | ||
blib/ | ||
App-part-* | ||
.releaserc | ||
cover_db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
TO DO: | ||
|
||
0.06 to be released | ||
. Import into git, post on github | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
.cvsignore$ | ||
^.git/ | ||
^.lwpcookies | ||
^.releaserc | ||
^blib/ | ||
^App-part-*.* | ||
CVS/ | ||
^pm_to_blib | ||
.tar.gz$ | ||
.old$ | ||
^Makefile$ | ||
^cvstest$ | ||
^blibdirs$ | ||
.bak$ | ||
^cover_db/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# -*- mode: perl; c-basic-offset: 4; indent-tabs-mode: nil; -*- | ||
|
||
use 5.006; #weaken | ||
use ExtUtils::MakeMaker; | ||
# See lib/ExtUtils/MakeMaker.pm for details of how to influence | ||
# the contents of the Makefile that is written. | ||
|
||
WriteMakefile1( | ||
MIN_PERL_VERSION => '5.006', | ||
META_MERGE => { | ||
resources => { | ||
repository => 'http://github.com/Corion/app-part', | ||
}, | ||
}, | ||
'NAME' => 'App::part', | ||
'LICENSE' => 'perl', | ||
'VERSION_FROM' => 'bin/part.pl', # finds $VERSION | ||
'PREREQ_PM' => { | ||
}, # e.g., Module::Name => 1.1 | ||
ABSTRACT_FROM => 'bin/part.pl', # retrieve abstract from module | ||
AUTHOR => 'Max Maischein <corion@cpan.org>', | ||
); | ||
|
||
1; | ||
|
||
sub WriteMakefile1 { #Written by Alexandr Ciornii, version 0.21. Added by eumm-upgrade. | ||
my %params=@_; | ||
my $eumm_version=$ExtUtils::MakeMaker::VERSION; | ||
$eumm_version=eval $eumm_version; | ||
die "EXTRA_META is deprecated" if exists $params{EXTRA_META}; | ||
die "License not specified" if not exists $params{LICENSE}; | ||
if ($params{BUILD_REQUIRES} and $eumm_version < 6.5503) { | ||
#EUMM 6.5502 has problems with BUILD_REQUIRES | ||
$params{PREREQ_PM}={ %{$params{PREREQ_PM} || {}} , %{$params{BUILD_REQUIRES}} }; | ||
delete $params{BUILD_REQUIRES}; | ||
} | ||
delete $params{CONFIGURE_REQUIRES} if $eumm_version < 6.52; | ||
delete $params{MIN_PERL_VERSION} if $eumm_version < 6.48; | ||
delete $params{META_MERGE} if $eumm_version < 6.46; | ||
delete $params{META_ADD} if $eumm_version < 6.46; | ||
delete $params{LICENSE} if $eumm_version < 6.31; | ||
delete $params{AUTHOR} if $] < 5.005; | ||
delete $params{ABSTRACT_FROM} if $] < 5.005; | ||
delete $params{BINARY_LOCATION} if $] < 5.005; | ||
|
||
WriteMakefile(%params); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
<c> | ||
#!/usr/bin/perl -w | ||
use strict; | ||
use Getopt::Long; | ||
|
||
use vars qw($VERSION); | ||
$VERSION = '0.06'; | ||
|
||
# Try to load Pod::Usage and install a fallback if it doesn't exist | ||
eval { | ||
require Pod::Usage; | ||
Pod::Usage->import(); | ||
1; | ||
} or do { | ||
*pod2usage = sub { | ||
die "Error in command line.\n"; | ||
}; | ||
}; | ||
|
||
=head1 NAME | ||
part - split up a single input file into multiple files according to a column value | ||
=head1 SYNOPSIS | ||
part FILES | ||
=head1 OPTIONS | ||
=over 4 | ||
=item B<--out> - set the output template | ||
If the output template is not given it is guessed from | ||
the name of the first input file or set to C<part-%s.txt>. | ||
The C<%s> will be replaced by the column value. | ||
=item B<--column> - set the column to part on | ||
This is the zero-based number of the column. | ||
Multiple columns may be given. | ||
=item B<--separator> - set the column separator | ||
This is the separator for the columns. It defaults | ||
to a tab character ("\t"). | ||
=item B<--header-line> - output the first line into every file | ||
This defines the line as header line which is output | ||
into every file. If it is given an argument that string | ||
is output as header, otherwise the first line read | ||
will be repeated as the header. | ||
If the value is a number, that many lines will be read from | ||
the file and used as the header. This makes it impossible | ||
to use just a number as the header. | ||
=item B<--verbose> - output the generated filenames | ||
In normal operation, the program will be silent. If you | ||
need to know the generated filenames, the C<--verbose> | ||
option will output them. | ||
=item B<--filename-sep> - set the separator for the filenames | ||
If you prefer a different separator for the filenames | ||
than a newline, this option allows you to set it. If | ||
the separator looks like an octal number (three digits) | ||
it is interpreted as such. Otherwise it will | ||
be taken literally. A common | ||
use is to set the separator to C<000> to separate the | ||
files by the zero character if you suspect that your | ||
filenames might contain newlines. | ||
It defaults to C<012>, a newline. | ||
=item B<--version> - output version information | ||
=back | ||
=head1 CAVEAT | ||
The program loads the whole input into RAM | ||
before writing the output. A future enhancement | ||
might be a C<uniq>-like option that tells the | ||
program to assume that the input will be grouped | ||
according to the parted column so it does not | ||
need to allocate memory. | ||
If your memory is not large enough, the following | ||
C<awk> one-liner might help you: | ||
# Example of parting on column 3 | ||
awk -F '{ print $0 > $3 }' FILE | ||
=head1 AUTHOR | ||
Copyright (c) 2007-2011 Max Maischein (C<< corion@cpan.org >>) | ||
=cut | ||
|
||
GetOptions( | ||
'out=s' => \my $tmpl, | ||
'column=i' => \my @col, | ||
'separator=s' => \my $sep, | ||
'verbose' => \my $verbose, | ||
'filename-sep=s' => \my $filename_sep, | ||
'header-line:s' => \my $header, | ||
'help' => \my $help, | ||
'version' => \my $version, | ||
) or pod2usage(2); | ||
pod2usage(1) if $help; | ||
if (defined $version) { | ||
print "$VERSION\n"; | ||
exit 0; | ||
}; | ||
pod2usage("$0: No files given.") if ((@ARGV == 0) && (-t STDIN)); | ||
|
||
if (! defined $tmpl) { | ||
# Let's hope we can guess from the first filename | ||
my $placeholder = '-%s' x @col; | ||
($tmpl = $ARGV[0] || 'part.txt') =~ s/\.(\w+)$/$placeholder.$1/; | ||
}; | ||
|
||
if (! defined $sep) { | ||
$sep = "\t"; | ||
}; | ||
|
||
$filename_sep ||= "012"; | ||
if ($filename_sep =~ /^\d{3}$/) { | ||
$filename_sep = chr oct $filename_sep | ||
}; | ||
|
||
my %lines; | ||
if (defined $header) { | ||
$header ||= 1; | ||
if ($header =~ /^\d+$/) { | ||
my $count = $header; | ||
$header = ""; | ||
$header .= <> | ||
while $count--; | ||
}; | ||
}; | ||
|
||
while (<>) { | ||
s/\r?\n$//; | ||
my @c = split /$sep/o; | ||
my $key = join $sep, @c[ @col ]; | ||
if (not defined $lines{ $key }) { | ||
$lines{ $key } ||= []; | ||
}; | ||
push @{ $lines{$key}}, $_ | ||
} | ||
|
||
for my $key (sort keys %lines) { | ||
my @vals = split /$sep/o, $key; | ||
my $name = sprintf $tmpl, @vals; | ||
open my $fh, ">", $name | ||
or die "Couldn't create '$name': $!"; | ||
if ($header) { | ||
print {$fh} $header; | ||
} | ||
print "$name$filename_sep" | ||
if $verbose; | ||
print {$fh} "$_\n" | ||
for (@{ $lines{ $key }}); | ||
}; | ||
</c> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package App::part; | ||
|
||
=head1 NAME | ||
App::part - split up files according to column value | ||
=head1 SYNOPSIS | ||
This module is just a placeholder for the main program, L<part> | ||
=cut | ||
|
||
1; |