Skip to content

Commit

Permalink
USA operators resolved down to 1000 number blocks, at the expense of …
Browse files Browse the repository at this point in the history
…losing all the weird little islands
  • Loading branch information
DrHyde committed Dec 13, 2018
1 parent 4da4c6e commit 5c60d6f
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 70 deletions.
8 changes: 2 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,5 @@ S9.xls
t/example-phone-numbers.t
COCodeStatus_ALL.csv
COCodeStatus_ALL.zip
EstCodes.zip
EstCodes.xlsx
WstCodes.zip
WstCodes.xlsx
CenCodes.zip
CenCodes.xlsx
AllBlocksAugmentedReport.txt
AllBlocksAugmentedReport.zip
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ matrix:
- cpanm --quiet --notest Digest::MD5
- cpanm --quiet --notest File::Find::Rule
- cpanm --quiet --notest Spreadsheet::ParseExcel
- cpanm --quiet --notest Spreadsheet::XLSX
- cpanm --quiet --notest Text::CSV_XS
- ./build-data.sh
# repetition ends here
Expand All @@ -32,7 +31,6 @@ before_install:
- cpanm --quiet --notest Digest::MD5
- cpanm --quiet --notest File::Find::Rule
- cpanm --quiet --notest Spreadsheet::ParseExcel
- cpanm --quiet --notest Spreadsheet::XLSX
- cpanm --quiet --notest Text::CSV_XS
- ./build-data.sh
sudo: false
2 changes: 1 addition & 1 deletion Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ WriteMakefile(
PREOP => "./build-data.sh"
},
realclean => {
FILES => "*.xls sabc.txt COCodeStatus_ALL.* CenCodes.* EstCodes.* WstCodes.* cover_db"
FILES => "*.xls sabc.txt COCodeStatus_ALL.* AllBlocksAugmentedReport.* cover_db"
},
);

Expand Down
103 changes: 73 additions & 30 deletions build-data.nanp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
use strict;
use warnings;
use XML::XPath;
use Spreadsheet::XLSX;
use Text::CSV_XS;
use Data::Dumper;local $Data::Dumper::Indent = 1;

Expand Down Expand Up @@ -104,14 +103,6 @@ TERRITORY: foreach my $territory (@territories) {
print MODULE "\$mobile_regexes{$ISO_country_code} = '$mobile';\n";
}

warn("Extracting Canadian operators\n");
open(my $operators, '<', 'COCodeStatus_ALL.csv') ||
die("Couldn't open COCodeStatus_ALL.csv: $!\n");
$csv->getline($operators); # header line
$csv->getline($operators); # date stamp?
mkdir('share');
unlink('share/Number-Phone-NANP-Data.db');

# file format:
# first a list of 32-bit pointers, one for each 10,000 number
# block NPA-NXX-xxxx, starting at 200-000. So the address of
Expand All @@ -123,40 +114,57 @@ unlink('share/Number-Phone-NANP-Data.db');
# 0: length byte then string
# 1: block of 10 pointers, one for each thousand number block in
# an NPA-NXX-X
my %seen_operators = ();
my $next_data_offset = 5 * 799999;

mkdir('share');
unlink('share/Number-Phone-NANP-Data.db');
open(my $randomfh, "> :raw :bytes", 'share/Number-Phone-NANP-Data.db') || die($!);
while(my $row = $csv->getline($operators)) {

warn("Extracting Canadian operators\n");
open(my $CA_operators, '<', 'COCodeStatus_ALL.csv') ||
die("Couldn't open COCodeStatus_ALL.csv: $!\n");
$csv->getline($CA_operators); # header line
$csv->getline($CA_operators); # date stamp?
while(my $row = $csv->getline($CA_operators)) {
last unless($row->[0]);
next unless($row->[2]);
my($co, $op) = (join('', $row->[0], $row->[1]), $row->[2]);
_write_operator_data($co, $op);
}

warn("Extracting US operators\n");
foreach my $file (map { "${_}Codes.xlsx" } qw(Est Cen Wst)) {
warn("$file\n");
my $excel = Spreadsheet::XLSX->new($file);
SHEET: foreach my $sheet (@{$excel->{Worksheet}}) {
next SHEET unless($sheet->{Name} =~ /^..$/);
warn(" ".$sheet->{Name}."\n");
ROW: foreach my $row ($sheet->{MinRow} + 3 .. $sheet->{MaxRow}) {
my($co, $use, $op) = map {
$sheet->{Cells}->[$row]->[$_]->{Val}
} ($sheet->{MinCol}, $sheet->{MinCol} + 1, $sheet->{MinCol} + 3);
next ROW if(!$op || $use ne 'AS'); # Assigned
$co =~ s/\D//g;
if(!$op) { warn("CO $co has no operator\n"); next ROW }
_write_operator_data($co, $op);
}
open(my $US_operators, '<', 'AllBlocksAugmentedReport.txt') ||
die("Couldn't open AllBlocksAugmentedReport.txt: $!\n");
$csv->getline($US_operators); # header line
my %US = ();
while(my $row = $csv->getline($US_operators)) {
my($npa, $nxx, $thousands, $status, $op) = map { $row->[$_] } (2, 3, 4, 5, 12);
next unless($status eq 'AS');
$US{"$npa$nxx"} ||= [];
push @{$US{"$npa$nxx"}}, { thousands => $thousands, op => $op }
}

foreach my $npanxx (keys %US) {
my $data = $US{$npanxx};
if(
$#{$data} == 9 && # got 10 assigned blocks
scalar(keys(%{{
map { $data->[$_]->{op} => 1 } (0 .. 9)
}})) == 1 # but only one operator
) {
# print "$npanxx is a ten-thousands block\n";
_write_operator_data($npanxx, $data->[0]->{op});
} else {
# if we get here the block is sub-divided
_write_multiple_operators($npanxx, $data);
}
}

my %seen_operators;
my $next_data_offset;
sub _write_operator_data {
my($co, $op) = @_;
$co -= 200000; # area codes 000 to 199 are invalid
$next_data_offset ||= 5 * 799999;
seek($randomfh, 4 * $co, 0);
# area codes 000 to 199 are invalid
seek($randomfh, 4 * ($co - 200000), 0);
if(exists($seen_operators{$op})) {
print $randomfh pack('N', $seen_operators{$op});
} else {
Expand All @@ -170,4 +178,39 @@ sub _write_operator_data {
}
}

sub _write_multiple_operators {
my($co, $data) = @_;
seek($randomfh, 4 * ($co - 200000), 0);
print $randomfh pack('N', $next_data_offset);
seek($randomfh, $next_data_offset, 0);
print $randomfh pack('C', 1); # block-type marker
$next_data_offset++;
# the array might have less than 10 entries, eg
# [
# { thousands => 3, op => "blah", pointer => ... },
# { thousands => 7, op => "otherblah", pointer => ... },
# { thousands => 9, op => "blah", pointer => ... },
# ]
# but the array of pointers in the file is always ten entries
my $start_of_pointers = $next_data_offset;
$next_data_offset += 40;

foreach my $index (0 .. $#{$data}) {
my($thousands_digit, $op) = map { $data->[$index]->{$_} } qw(thousands op);

seek($randomfh, $start_of_pointers + 4 * $thousands_digit, 0);
if(exists($seen_operators{$op})) {
print $randomfh pack('N', $seen_operators{$op});
} else {
print $randomfh pack('N', $next_data_offset);
seek($randomfh, $next_data_offset, 0);
die("Operator '$op' is longer than 255 bytes\n")
if(length($op) > 255);
print $randomfh pack('CCA*', 0, length($op), $op);
$seen_operators{$op} = $next_data_offset;
$next_data_offset += length($op) + 2;
}
}
}

close(MODULE);
48 changes: 20 additions & 28 deletions build-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,22 @@ EXITSTATUS=0
# https://www.nationalnanpa.com/reports/reports_cocodes.html
# http://cnac.ca/co_codes/co_code_status.htm
for i in \
http://static.ofcom.org.uk/static/numbering/sabc.txt \
http://static.ofcom.org.uk/static/numbering/sabcde11_12.xls \
http://static.ofcom.org.uk/static/numbering/sabcde13.xls \
http://static.ofcom.org.uk/static/numbering/sabcde14.xls \
http://static.ofcom.org.uk/static/numbering/sabcde15.xls \
http://static.ofcom.org.uk/static/numbering/sabcde16.xls \
http://static.ofcom.org.uk/static/numbering/sabcde17.xls \
http://static.ofcom.org.uk/static/numbering/sabcde18.xls \
http://static.ofcom.org.uk/static/numbering/sabcde19.xls \
http://static.ofcom.org.uk/static/numbering/sabcde2.xls \
http://static.ofcom.org.uk/static/numbering/S3.xls \
http://static.ofcom.org.uk/static/numbering/S5.xls \
http://static.ofcom.org.uk/static/numbering/S7.xls \
http://static.ofcom.org.uk/static/numbering/S8.xls \
http://static.ofcom.org.uk/static/numbering/S9.xls \
https://www.nationalnanpa.com/nanp1/CenCodes.zip \
https://www.nationalnanpa.com/nanp1/EstCodes.zip \
https://www.nationalnanpa.com/nanp1/WstCodes.zip \
http://static.ofcom.org.uk/static/numbering/sabc.txt \
http://static.ofcom.org.uk/static/numbering/sabcde11_12.xls \
http://static.ofcom.org.uk/static/numbering/sabcde13.xls \
http://static.ofcom.org.uk/static/numbering/sabcde14.xls \
http://static.ofcom.org.uk/static/numbering/sabcde15.xls \
http://static.ofcom.org.uk/static/numbering/sabcde16.xls \
http://static.ofcom.org.uk/static/numbering/sabcde17.xls \
http://static.ofcom.org.uk/static/numbering/sabcde18.xls \
http://static.ofcom.org.uk/static/numbering/sabcde19.xls \
http://static.ofcom.org.uk/static/numbering/sabcde2.xls \
http://static.ofcom.org.uk/static/numbering/S3.xls \
http://static.ofcom.org.uk/static/numbering/S5.xls \
http://static.ofcom.org.uk/static/numbering/S7.xls \
http://static.ofcom.org.uk/static/numbering/S8.xls \
http://static.ofcom.org.uk/static/numbering/S9.xls \
https://www.nationalpooling.com/reports/region/AllBlocksAugmentedReport.zip \
http://www.cnac.ca/data/COCodeStatus_ALL.zip;
do
# make sure that there's a file that curl -z can look at
Expand All @@ -47,11 +45,9 @@ do
fi
curl -z `basename $i` -R -O -s $i;
done
rm COCodeStatus_ALL.csv ???Codes.xlsx
rm COCodeStatus_ALL.csv ???Codes.xlsx AllBlocksAugmentedReport.txt
unzip -q COCodeStatus_ALL.zip
unzip -q CenCodes.zip
unzip -q EstCodes.zip
unzip -q WstCodes.zip
unzip -q AllBlocksAugmentedReport.zip

# if share/Number-Phone-UK-Data.db doesn't exist, or OFCOM's stuff is newer ...
if test ! -e share/Number-Phone-UK-Data.db -o \
Expand Down Expand Up @@ -104,13 +100,9 @@ if test ! -e lib/Number/Phone/NANP/Data.pm -o \
libphonenumber/resources/geocoding/en/1.txt -nt lib/Number/Phone/NANP/Data.pm -o \
libphonenumber/resources/PhoneNumberMetadata.xml -nt lib/Number/Phone/NANP/Data.pm -o \
! -e share/Number-Phone-NANP-Data.db -o \
EstCodes.zip -nt share/Number-Phone-NANP-Data.db -o \
CenCodes.zip -nt share/Number-Phone-NANP-Data.db -o \
WstCodes.zip -nt share/Number-Phone-NANP-Data.db -o \
AllBlocksAugmentedReport.zip -nt share/Number-Phone-NANP-Data.db -o \
COCodeStatus_ALL.zip -nt share/Number-Phone-NANP-Data.db -o \
EstCodes.xlsx -nt share/Number-Phone-NANP-Data.db -o \
CenCodes.xlsx -nt share/Number-Phone-NANP-Data.db -o \
WstCodes.xlsx -nt share/Number-Phone-NANP-Data.db -o \
AllBlocksAugmentedReport.txt -nt share/Number-Phone-NANP-Data.db -o \
COCodeStatus_ALL.csv -nt share/Number-Phone-NANP-Data.db;
then
if [ "$TRAVIS" != "true" ]; then
Expand Down
8 changes: 7 additions & 1 deletion lib/Number/Phone/NANP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,15 @@ sub _get_data_starting_from_pointer_at_offset {
$pointer += 1;

if($block_type == 0) {
# $pointer points at a string
return $self->_get_string_at_offset($pointer);
} elsif($block_type == 1) {
# $pointer points at a block of pointers
(my $number = ${$self}) =~ s/\D//g;
my $thousands = substr($number, 7, 1); # the seventh digit
return $self->_get_data_starting_from_pointer_at_offset($pointer + 4 * $thousands);
} else {
die("Don't know how to parse a block of type $block_type\n")
die("Don't know how to handle a block of type $block_type at ".($pointer - 1)."\n");
}
}

Expand Down
24 changes: 22 additions & 2 deletions t/inc/common-nanp_and_libphonenumber_tests.pl
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,29 @@

note("operator");
skip_if_libphonenumber("Stubs don't support operator", 4, sub {
is($CLASS->new('+14163922489')->operator(), 'Bell Canada', "Canada");
# Canadian numbers are all allocated from ten-thousand blocks
is($CLASS->new('+1 416 392 2489')->operator(), 'Bell Canada', "Canada");
is($CLASS->new('+1 202 200 0000')->operator(), 'SPRINT SPECTRUM L.P.', 'USA, ten-thousand block allocation');
foreach my $tuple (
[ '+1 512 373 0000', 'SPRINT SPECTRUM L.P.' ],
[ '+1 512 373 1000', undef ],
[ '+1 512 373 2000', 'SPRINT SPECTRUM L.P.', ],
[ '+1 512 373 3000', 'TIME WARNER CBLE INFO SVC (TX) DBA TIME WARNER CBL', ],
[ '+1 512 373 4000', undef ],
[ '+1 512 373 5000', 'SPRINT SPECTRUM L.P.' ],
[ '+1 512 373 6000', 'SPRINT SPECTRUM L.P.' ],
[ '+1 512 373 7000', undef ],
[ '+1 512 373 8000', 'TIME WARNER CBLE INFO SVC (TX) DBA TIME WARNER CBL' ],
[ '+1 512 373 9000', 'SPRINT SPECTRUM L.P.' ]
) {
is(
$CLASS->new($tuple->[0])->operator(),
$tuple->[1],
'USA, thousand block, '.$tuple->[0].', '.
(defined($tuple->[1]) ? 'allocated' : 'unallocated')
);
}
is($CLASS->new('+13407745666')->operator(), 'VIRGIN ISLANDS TEL. CORP. DBA INNOVATIVE TELEPHONE', "US Virgin Islands");
is($CLASS->new('+12024566213')->operator(), 'VERIZON WASHINGTON, DC INC.', "USA");
is($CLASS->new('+16714727679')->operator(),'TELEGUAM HOLDINGS, LLC', "Guam");
});

Expand Down

0 comments on commit 5c60d6f

Please sign in to comment.