Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Add a script for looking for bad MARC records

So far it only looks for records without a title but hey, it's a
start.
  • Loading branch information...
commit c83a7892d475a373424418e727b4d0e9d0a5d7e3 1 parent 7bfd224
Magnus Enger authored November 11, 2011

Showing 1 changed file with 175 additions and 0 deletions. Show diff stats Hide diff stats

  1. 175  checkmarc.pl
175  checkmarc.pl
... ...
@@ -0,0 +1,175 @@
  1
+#!/usr/bin/perl -w
  2
+
  3
+# checkmarc.pl
  4
+# Copyright 2011 Magnus Enger
  5
+
  6
+# This is free software; you can redistribute it and/or modify
  7
+# it under the terms of the GNU General Public License as published by
  8
+# the Free Software Foundation; either version 2 of the License, or
  9
+# (at your option) any later version.
  10
+#
  11
+# This file is distributed in the hope that it will be useful,
  12
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14
+# GNU General Public License for more details.
  15
+#
  16
+# You should have received a copy of the GNU General Public License
  17
+# along with this file; if not, write to the Free Software
  18
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19
+
  20
+# For deleteing a biblio: 
  21
+# /cgi-bin/koha/cataloguing/addbiblio.pl?op=delete&biblionumber=xxxxx
  22
+
  23
+use Getopt::Long;
  24
+use Pod::Usage;
  25
+use MARC::File::USMARC;
  26
+use MARC::File::XML;
  27
+use String::Strip;
  28
+use Encode;
  29
+# binmode STDOUT, ":utf8";
  30
+
  31
+use strict;
  32
+
  33
+## Redirect STDERR to STDOUT
  34
+open STDERR, ">&STDOUT" or die "cannot dup STDERR to STDOUT: $!\n";
  35
+
  36
+## get command line options
  37
+my ($input_file, $limit, $verbose, $debug, $sql) = get_options();
  38
+print "\nStarting checkmarc.pl\n"        if $debug;
  39
+print "Input File: $input_file\n"        if $debug;
  40
+print "Stopping after $limit records\n"  if $debug && $limit;
  41
+
  42
+if (!-e $input_file) {
  43
+	die "Couldn't find input file $input_file\n";
  44
+}
  45
+
  46
+my $batch = MARC::File::USMARC->in($input_file);
  47
+my $count = 0;
  48
+my $problematic = 0;
  49
+
  50
+print "Starting records iteration\n" if $debug;
  51
+## iterate through our marc files and do stuff
  52
+while (my $record = $batch->next()) {
  53
+
  54
+  my $found_error = 0;
  55
+
  56
+	# Get the biblionumber from 999c
  57
+	my $biblionumber;
  58
+	if ($record->field('999') && $record->field('999')->subfield('c')) {
  59
+	  $biblionumber = $record->field('999')->subfield('c');
  60
+	} else {
  61
+	  print "NO BIBLIONUMBER for iteration $count\n";
  62
+	}
  63
+  
  64
+	# print the record before it is transformed
  65
+	print "\n######## bib $biblionumber iter $count ################\n" if $debug;
  66
+	print $record->title(), "\n"        if $debug;
  67
+	print $record->as_formatted(), "\n" if $debug;
  68
+	
  69
+	## Do the checks
  70
+	
  71
+	# Check that we have a title
  72
+	unless ($record->field('245')) {
  73
+	  unless ($sql) {
  74
+	    print "$biblionumber No title\n";
  75
+	  }
  76
+	  $found_error = 1;
  77
+	}
  78
+	
  79
+	if ($found_error == 1 && $verbose) {
  80
+	  print $record->as_formatted(), "\n";
  81
+	}
  82
+	
  83
+	# Output SQL statements for deleting records with errors
  84
+	if ($found_error == 1 && $sql) {
  85
+	  print "delete from biblioitems where biblionumber = $biblionumber;\n";
  86
+	  print "delete from items where biblionumber = $biblionumber;\n";
  87
+	}
  88
+	
  89
+	$count++;
  90
+	$problematic += $found_error;
  91
+
  92
+	# Check if --limit is set and we need to stop processing
  93
+	if ($limit > 0 && $count == $limit) {
  94
+	  last;
  95
+	}
  96
+	
  97
+}
  98
+print "\nEnd of records\n" if $debug;
  99
+
  100
+# make sure there weren't any problems
  101
+if ( my @warnings = $batch->warnings() ) {
  102
+  print "\nWarnings were detected!\n", @warnings if $debug;
  103
+}
  104
+
  105
+unless ($sql) {
  106
+  print "Looked at $count records, found $problematic bad records.\n";
  107
+}
  108
+
  109
+### SUBROUTINES ###
  110
+
  111
+# Get commandline options
  112
+sub get_options {
  113
+  my $input_file = '';
  114
+  my $sql = '';
  115
+  my $verbose = '';
  116
+  my $debug = '';
  117
+  my $limit = 0;
  118
+  my $help = '';
  119
+
  120
+  GetOptions("i|infile=s" => \$input_file,
  121
+             "s|sql!"     => \$sql, 
  122
+             "d|debug!"   => \$debug,
  123
+             "v|verbose!" => \$verbose,
  124
+             "l|limit=s"  => \$limit, 
  125
+             'h|?|help'   => \$help
  126
+             );
  127
+  
  128
+  pod2usage(-exitval => 0) if $help;
  129
+  pod2usage( -msg => "\nMissing Argument: -i, --infile required\n", -exitval => 1) if !$input_file;
  130
+
  131
+  return ($input_file, $limit, $verbose, $debug, $sql);
  132
+}       
  133
+
  134
+__END__
  135
+
  136
+=head1 NAME
  137
+    
  138
+checkmarc.pl - Check MARC records for common problems.
  139
+        
  140
+=head1 SYNOPSIS
  141
+            
  142
+checkmarc.pl -i inputfile [-d] [-l] [-h] [-d] [-s]
  143
+               
  144
+=head1 OPTIONS
  145
+              
  146
+=over 8
  147
+                                                   
  148
+=item B<-i, --infile>
  149
+
  150
+Name of the MARC file to be read.
  151
+
  152
+=item B<-s, --sql>
  153
+
  154
+Output SQL statements for deleting the records you have found from biblioitems 
  155
+and biblios.
  156
+
  157
+=item B<-l, --limit>
  158
+
  159
+Stop processing after n records.
  160
+
  161
+=item B<-d, --debug>
  162
+
  163
+Records in mnemonic form will be output. 
  164
+
  165
+=item B<-v, --verbose>
  166
+
  167
+More verbose output. 
  168
+
  169
+=item B<-h, -?, --help>
  170
+                                               
  171
+Prints this help message and exits.
  172
+
  173
+=back
  174
+                                                               
  175
+=cut

0 notes on commit c83a789

Please sign in to comment.
Something went wrong with that request. Please try again.