Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Max Maischein committed Feb 9, 2011
0 parents commit 699cd6f
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .gitignore
@@ -0,0 +1,9 @@
Makefile
Makefile.old
*.tar.gz
*.bak
pm_to_blib
blib/
App-part-*
.releaserc
cover_db
5 changes: 5 additions & 0 deletions Changes
@@ -0,0 +1,5 @@
TO DO:

0.06 to be released
. Import into git, post on github

15 changes: 15 additions & 0 deletions MANIFEST.skip
@@ -0,0 +1,15 @@
.cvsignore$
^.git/
^.lwpcookies
^.releaserc
^blib/
^App-part-*.*
CVS/
^pm_to_blib
.tar.gz$
.old$
^Makefile$
^cvstest$
^blibdirs$
.bak$
^cover_db/
47 changes: 47 additions & 0 deletions Makefile.PL
@@ -0,0 +1,47 @@
# -*- mode: perl; c-basic-offset: 4; indent-tabs-mode: nil; -*-

use 5.006; #weaken
use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.

WriteMakefile1(
MIN_PERL_VERSION => '5.006',
META_MERGE => {
resources => {
repository => 'http://github.com/Corion/app-part',
},
},
'NAME' => 'App::part',
'LICENSE' => 'perl',
'VERSION_FROM' => 'bin/part.pl', # finds $VERSION
'PREREQ_PM' => {
}, # e.g., Module::Name => 1.1
ABSTRACT_FROM => 'bin/part.pl', # retrieve abstract from module
AUTHOR => 'Max Maischein <corion@cpan.org>',
);

1;

sub WriteMakefile1 { #Written by Alexandr Ciornii, version 0.21. Added by eumm-upgrade.
my %params=@_;
my $eumm_version=$ExtUtils::MakeMaker::VERSION;
$eumm_version=eval $eumm_version;
die "EXTRA_META is deprecated" if exists $params{EXTRA_META};
die "License not specified" if not exists $params{LICENSE};
if ($params{BUILD_REQUIRES} and $eumm_version < 6.5503) {
#EUMM 6.5502 has problems with BUILD_REQUIRES
$params{PREREQ_PM}={ %{$params{PREREQ_PM} || {}} , %{$params{BUILD_REQUIRES}} };
delete $params{BUILD_REQUIRES};
}
delete $params{CONFIGURE_REQUIRES} if $eumm_version < 6.52;
delete $params{MIN_PERL_VERSION} if $eumm_version < 6.48;
delete $params{META_MERGE} if $eumm_version < 6.46;
delete $params{META_ADD} if $eumm_version < 6.46;
delete $params{LICENSE} if $eumm_version < 6.31;
delete $params{AUTHOR} if $] < 5.005;
delete $params{ABSTRACT_FROM} if $] < 5.005;
delete $params{BINARY_LOCATION} if $] < 5.005;

WriteMakefile(%params);
}
169 changes: 169 additions & 0 deletions bin/part.pl
@@ -0,0 +1,169 @@
<c>
#!/usr/bin/perl -w
use strict;
use Getopt::Long;

use vars qw($VERSION);
$VERSION = '0.06';

# Try to load Pod::Usage and install a fallback if it doesn't exist
eval {
require Pod::Usage;
Pod::Usage->import();
1;
} or do {
*pod2usage = sub {
die "Error in command line.\n";
};
};

=head1 NAME
part - split up a single input file into multiple files according to a column value
=head1 SYNOPSIS
part FILES
=head1 OPTIONS
=over 4
=item B<--out> - set the output template
If the output template is not given it is guessed from
the name of the first input file or set to C<part-%s.txt>.
The C<%s> will be replaced by the column value.
=item B<--column> - set the column to part on
This is the zero-based number of the column.
Multiple columns may be given.
=item B<--separator> - set the column separator
This is the separator for the columns. It defaults
to a tab character ("\t").
=item B<--header-line> - output the first line into every file
This defines the line as header line which is output
into every file. If it is given an argument that string
is output as header, otherwise the first line read
will be repeated as the header.
If the value is a number, that many lines will be read from
the file and used as the header. This makes it impossible
to use just a number as the header.
=item B<--verbose> - output the generated filenames
In normal operation, the program will be silent. If you
need to know the generated filenames, the C<--verbose>
option will output them.
=item B<--filename-sep> - set the separator for the filenames
If you prefer a different separator for the filenames
than a newline, this option allows you to set it. If
the separator looks like an octal number (three digits)
it is interpreted as such. Otherwise it will
be taken literally. A common
use is to set the separator to C<000> to separate the
files by the zero character if you suspect that your
filenames might contain newlines.
It defaults to C<012>, a newline.
=item B<--version> - output version information
=back
=head1 CAVEAT
The program loads the whole input into RAM
before writing the output. A future enhancement
might be a C<uniq>-like option that tells the
program to assume that the input will be grouped
according to the parted column so it does not
need to allocate memory.
If your memory is not large enough, the following
C<awk> one-liner might help you:
# Example of parting on column 3
awk -F '{ print $0 > $3 }' FILE
=head1 AUTHOR
Copyright (c) 2007-2011 Max Maischein (C<< corion@cpan.org >>)
=cut

GetOptions(
'out=s' => \my $tmpl,
'column=i' => \my @col,
'separator=s' => \my $sep,
'verbose' => \my $verbose,
'filename-sep=s' => \my $filename_sep,
'header-line:s' => \my $header,
'help' => \my $help,
'version' => \my $version,
) or pod2usage(2);
pod2usage(1) if $help;
if (defined $version) {
print "$VERSION\n";
exit 0;
};
pod2usage("$0: No files given.") if ((@ARGV == 0) && (-t STDIN));

if (! defined $tmpl) {
# Let's hope we can guess from the first filename
my $placeholder = '-%s' x @col;
($tmpl = $ARGV[0] || 'part.txt') =~ s/\.(\w+)$/$placeholder.$1/;
};

if (! defined $sep) {
$sep = "\t";
};

$filename_sep ||= "012";
if ($filename_sep =~ /^\d{3}$/) {
$filename_sep = chr oct $filename_sep
};

my %lines;
if (defined $header) {
$header ||= 1;
if ($header =~ /^\d+$/) {
my $count = $header;
$header = "";
$header .= <>
while $count--;
};
};

while (<>) {
s/\r?\n$//;
my @c = split /$sep/o;
my $key = join $sep, @c[ @col ];
if (not defined $lines{ $key }) {
$lines{ $key } ||= [];
};
push @{ $lines{$key}}, $_
}

for my $key (sort keys %lines) {
my @vals = split /$sep/o, $key;
my $name = sprintf $tmpl, @vals;
open my $fh, ">", $name
or die "Couldn't create '$name': $!";
if ($header) {
print {$fh} $header;
}
print "$name$filename_sep"
if $verbose;
print {$fh} "$_\n"
for (@{ $lines{ $key }});
};
</c>
13 changes: 13 additions & 0 deletions lib/App/part.pm
@@ -0,0 +1,13 @@
package App::part;

=head1 NAME
App::part - split up files according to column value
=head1 SYNOPSIS
This module is just a placeholder for the main program, L<part>
=cut

1;

0 comments on commit 699cd6f

Please sign in to comment.