@@ -809,6 +809,8 @@ def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
809
809
If p is a scalar, the largest value of x less than or equal
810
810
to the p percentage point in the sequence is returned.
811
811
"""
812
+
813
+
812
814
x = npy .ravel (x )
813
815
x .sort ()
814
816
Nx = len (x )
@@ -1282,7 +1284,10 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=5, delimiter=',',
1282
1284
converterd, if not None, is a dictionary mapping column number or
1283
1285
munged column name to a converter function
1284
1286
1285
- See examples/loadrec.py
1287
+ names, if not None, is a list of header names. In this case, no
1288
+ header will be read from the file
1289
+
1290
+ if no rows are found, None is returned See examples/loadrec.py
1286
1291
"""
1287
1292
1288
1293
if converterd is None :
@@ -1291,9 +1296,42 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=5, delimiter=',',
1291
1296
import dateutil .parser
1292
1297
parsedate = dateutil .parser .parse
1293
1298
1299
+
1294
1300
fh = cbook .to_filehandle (fname )
1295
- reader = csv .reader (fh , delimiter = delimiter )
1296
1301
1302
+
1303
+ class FH :
1304
+ """
1305
+ for space delimited files, we want different behavior than
1306
+ comma or tab. Generally, we want multiple spaces to be
1307
+ treated as a single separator, whereas with comma and tab we
1308
+ want multiple commas to return multiple (empty) fields. The
1309
+ join/strip trick below effects this
1310
+ """
1311
+ def __init__ (self , fh ):
1312
+ self .fh = fh
1313
+
1314
+ def close (self ):
1315
+ self .fh .close ()
1316
+
1317
+ def seek (self , arg ):
1318
+ self .fh .seek (arg )
1319
+
1320
+ def fix (self , s ):
1321
+ return ' ' .join (s .split ())
1322
+
1323
+
1324
+ def next (self ):
1325
+ return self .fix (self .fh .next ())
1326
+
1327
+ def __iter__ (self ):
1328
+ for line in self .fh :
1329
+ yield self .fix (line )
1330
+
1331
+ if delimiter == ' ' :
1332
+ fh = FH (fh )
1333
+
1334
+ reader = csv .reader (fh , delimiter = delimiter )
1297
1335
def process_skiprows (reader ):
1298
1336
if skiprows :
1299
1337
for i , row in enumerate (reader ):
@@ -1388,9 +1426,131 @@ def get_converters(reader):
1388
1426
rows .append ([func (val ) for func , val in zip (converters , row )])
1389
1427
fh .close ()
1390
1428
1429
+ if not len (rows ):
1430
+ return None
1391
1431
r = npy .rec .fromrecords (rows , names = names )
1392
1432
return r
1393
1433
1434
+
1435
+ def rec2csv (r , fname , delimiter = ',' ):
1436
+ """
1437
+ Save the data from numpy record array r into a comma/space/tab
1438
+ delimited file. The record array dtype names will be used for
1439
+ column headers.
1440
+
1441
+
1442
+ fname - can be a filename or a file handle. Support for gzipped
1443
+ files is automatic, if the filename ends in .gz
1444
+ """
1445
+ fh = cbook .to_filehandle (fname , 'w' )
1446
+ writer = csv .writer (fh , delimiter = delimiter )
1447
+ header = r .dtype .names
1448
+ writer .writerow (header )
1449
+ for row in r :
1450
+ writer .writerow (map (str , row ))
1451
+ fh .close ()
1452
+
1453
+ # some record array helpers
1454
+ def rec_append_field (rec , name , arr , dtype = None ):
1455
+ 'return a new record array with field name populated with data from array arr'
1456
+ arr = npy .asarray (arr )
1457
+ if dtype is None :
1458
+ dtype = arr .dtype
1459
+ newdtype = npy .dtype (rec .dtype .descr + [(name , dtype )])
1460
+ newrec = npy .empty (rec .shape , dtype = newdtype )
1461
+ for field in rec .dtype .fields :
1462
+ newrec [field ] = rec [field ]
1463
+ newrec [name ] = arr
1464
+ return newrec .view (npy .recarray )
1465
+
1466
+
1467
+ def rec_drop_fields (rec , names ):
1468
+ 'return a new numpy record array with fields in names dropped'
1469
+
1470
+ names = set (names )
1471
+ Nr = len (rec )
1472
+
1473
+ newdtype = npy .dtype ([(name , rec .dtype [name ]) for name in rec .dtype .names
1474
+ if name not in names ])
1475
+
1476
+ newrec = npy .empty (Nr , dtype = newdtype )
1477
+ for field in newdtype .names :
1478
+ newrec [field ] = rec [field ]
1479
+
1480
+ return newrec .view (npy .recarray )
1481
+
1482
+
1483
+ def rec_join (key , r1 , r2 ):
1484
+ """
1485
+ join record arrays r1 and r2 on key; key is a tuple of field
1486
+ names. if r1 and r2 have equal values on all the keys in the key
1487
+ tuple, then their fields will be merged into a new record array
1488
+ containing the union of the fields of r1 and r2
1489
+ """
1490
+
1491
+ for name in key :
1492
+ if name not in r1 .dtype .names :
1493
+ raise ValueError ('r1 does not have key field %s' % name )
1494
+ if name not in r2 .dtype .names :
1495
+ raise ValueError ('r2 does not have key field %s' % name )
1496
+
1497
+ def makekey (row ):
1498
+ return tuple ([row [name ] for name in key ])
1499
+
1500
+
1501
+ names = list (r1 .dtype .names ) + [name for name in r2 .dtype .names if name not in set (r1 .dtype .names )]
1502
+
1503
+
1504
+
1505
+ r1d = dict ([(makekey (row ),i ) for i ,row in enumerate (r1 )])
1506
+ r2d = dict ([(makekey (row ),i ) for i ,row in enumerate (r2 )])
1507
+
1508
+ r1keys = set (r1d .keys ())
1509
+ r2keys = set (r2d .keys ())
1510
+
1511
+ keys = r1keys & r2keys
1512
+
1513
+ r1ind = [r1d [k ] for k in keys ]
1514
+ r2ind = [r2d [k ] for k in keys ]
1515
+
1516
+
1517
+ r1 = r1 [r1ind ]
1518
+ r2 = r2 [r2ind ]
1519
+
1520
+ r2 = rec_drop_fields (r2 , r1 .dtype .names )
1521
+
1522
+
1523
+ def key_desc (name ):
1524
+ 'if name is a string key, use the larger size of r1 or r2 before merging'
1525
+ dt1 = r1 .dtype [name ]
1526
+ if dt1 .type != npy .string_ :
1527
+ return (name , dt1 .descr [0 ][1 ])
1528
+
1529
+ dt2 = r1 .dtype [name ]
1530
+ assert dt2 == dt1
1531
+ if dt1 .num > dt2 .num :
1532
+ return (name , dt1 .descr [0 ][1 ])
1533
+ else :
1534
+ return (name , dt2 .descr [0 ][1 ])
1535
+
1536
+
1537
+
1538
+ keydesc = [key_desc (name ) for name in key ]
1539
+
1540
+ newdtype = npy .dtype (keydesc +
1541
+ [desc for desc in r1 .dtype .descr if desc [0 ] not in key ] +
1542
+ [desc for desc in r2 .dtype .descr if desc [0 ] not in key ] )
1543
+
1544
+
1545
+ newrec = npy .empty (len (r1 ), dtype = newdtype )
1546
+ for field in r1 .dtype .names :
1547
+ newrec [field ] = r1 [field ]
1548
+
1549
+ for field in r2 .dtype .names :
1550
+ newrec [field ] = r2 [field ]
1551
+
1552
+ return newrec .view (npy .recarray )
1553
+
1394
1554
def slopes (x ,y ):
1395
1555
"""
1396
1556
SLOPES calculate the slope y'(x) Given data vectors X and Y SLOPES
0 commit comments