v1.47 read/write csv

PydPiper · Sep 21, 2020 · 0834be4 · 0834be4
1 parent fced972
commit 0834be4
Show file tree

Hide file tree

Showing 9 changed files with 213 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -87,17 +87,18 @@ and/or download restrictions, see [docs - installation](https://pylightxl.readth
 
 ---
 
-#### **Future Version**
+#### **pypi version 1.47**
+
 - added new function: ``db.nr('table1')`` returns the contents of named range "table1"
 - added new function: ``db.ws('Sheet1').range('A1:C3')`` that returns the contents of a range
   it also has the ability to return the formulas of the range
 - updated ``db.ws('Sheet1').row()`` and ``db.ws('Sheet1').col()`` to take in a new argument ``formual``
   that returns the formulas of a row or col
-
-#### **pypi version 1.46**
-
-- bug fix: added ability to input an empty string into the cell update functions 
-  (previously entering val='') threw and error
+- bugfix: write to existing without named ranges was throwing a "repair" error. Fixed typo on xml for it
+  and added unit tests to capture it
+- added new function: ``xl.readcsv(fn, delimiter, ws)`` to read csv files and create a pylightxl db out
+  of it (type converted)
+- added new function: ``xl.writecsv(db, fn, ws, delimiter)`` to write out a pylightxl worksheet as a csv
 
 See full history log of revisions: [Here](https://pylightxl.readthedocs.io/en/latest/revlog.html)
 

diff --git a/doc/source/revlog.rst b/doc/source/revlog.rst
@@ -1,13 +1,22 @@
 Revision Log
 ============
 
-pypi version 1.47 (in-work)
+pypi version 1.48 (in-work)
 ---------------------------
+- csv file handle
+
+pypi version 1.47
+-----------------
 - added new function: ``db.nr('table1')`` returns the contents of named range "table1"
 - added new function: ``db.ws('Sheet1').range('A1:C3')`` that returns the contents of a range
   it also has the ability to return the formulas of the range
 - updated ``db.ws('Sheet1').row()`` and ``db.ws('Sheet1').col()`` to take in a new argument ``formual``
   that returns the formulas of a row or col
+- bugfix: write to existing without named ranges was throwing a "repair" error. Fixed typo on xml for it
+  and added unit tests to capture it
+- added new function: ``xl.readcsv(fn, delimiter, ws)`` to read csv files and create a pylightxl db out
+  of it (type converted)
+- added new function: ``xl.writecsv(db, fn, ws, delimiter)`` to write out a pylightxl worksheet as a csv
 
 
 pypi version 1.46

diff --git a/pylightxl/__init__.py b/pylightxl/__init__.py
@@ -1 +1 @@
-from .pylightxl import readxl, writexl, Database
+from .pylightxl import readxl, readcsv, writexl, writecsv, Database
diff --git a/pylightxl/pylightxl.py b/pylightxl/pylightxl.py
@@ -2,10 +2,10 @@
 # SEC-00: PREFACE
 ########################################################################################################
 """
-
 Title: pylightxl
-
-Version: 1.46
+Developed by: pydpiper
+Version: 1.47
+License: MIT
 
 Source: https://github.com/PydPiper/pylightxl
 
@@ -31,17 +31,7 @@
     - SEC-04: DATABASE FUNCTIONS
     - SEC-05: UTILITY FUNCTIONS
 
-TODO MASTER LIST:
 """
-#TODO: integrate namedrange into ssd function
-#TODO: add/remove row/col
-#TODO: read csv into db
-#TODO: write csv from db sheet or ssd
-#TODO: multi-dim indexing (col A: 1,1,1,2,1,1,2 | col B: A,A,B,C,A,A,A | col D: 10,20,30,40,50,60,70)
-#       give me results where col A = 2 and col B = 'C' in col D -> [40,]  also optional arg to return indexes
-#TODO: function that output data in pandas like data format (in-case someone needed to convert to pandas)
-#TODO: matrix function to output 2D data lists
-
 
 ########################################################################################################
 # SEC-01: IMPORTS
@@ -360,6 +350,61 @@ def readxl_scrape(f, sharedString):
     return data
 
 
+def readcsv(fn, delimiter=',', ws='Sheet1'):
+    """
+    Reads an xlsx or xlsm file and returns a pylightxl database
+
+    :param str fn: Excel file name
+    :param str delimiter=',': csv file delimiter
+    :param str ws='Sheet1': worksheet name that the csv data will be stored in
+    :return: pylightxl.Database class
+    """
+
+    # declare a db
+    db = Database()
+
+    # test that file entered was a valid excel file
+    if 'pathlib' in str(type(fn)):
+        fn = str(fn)
+
+    # data = {'A1': data1, 'A2': data2...}
+    data = {}
+
+    with open(fn, 'r') as f:
+        i_row = 0
+        while True:
+            i_row += 1
+
+            line = f.readline()
+
+            if not line:
+                break
+
+            line = line.replace('\n', '').replace('\r', '')
+
+            items = line.split(delimiter)
+
+            for i_col, item in enumerate(items, 1):
+                address = utility_num2columnletters(i_col) + str(i_row)
+
+                # data conditioning
+                try:
+                    if '.' in item:
+                        item = float(item)
+                    else:
+                        item = int(item)
+                except ValueError:
+                    if 'true' in item.strip().lower():
+                        item = True
+                    elif 'false' in item.strip().lower():
+                        item = False
+
+                data[address] = {'v': item, 'f': None, 's': None}
+
+    db.add_ws(ws, data)
+
+    return db
+
 ########################################################################################################
 # SEC-04: WRITEXL FUNCTIONS
 ########################################################################################################
@@ -1071,6 +1116,49 @@ def writexl_new_content_types_text(db):
     return rv
 
 
+def writecsv(db, fn, ws=(), delimiter=','):
+    """
+    Writes a csv file from pylightxl database. For db that have more than one sheet, will write out,
+    multiple files with the sheetname tagged on the end (ex: "fn_sh2.csv")
+
+    :param pylightxl.Database db:
+    :param str fn: output file name (without extension; ie. no '.csv')
+    :param str or tuple ws=(): sheetname(s) to read into the database, if not specified - all sheets are read
+    :param delimiter=',': csv delimiter
+    :return: None
+    """
+
+    if ws == ():
+        # write all worksheets
+        worksheets = db.ws_names
+    else:
+        # write only specified worksheets
+        worksheets = (ws,) if type(ws) is str else ws
+
+    for sheet in worksheets:
+            new_fn = fn + '_' + sheet + '.csv'
+
+            try:
+                f = open(new_fn, 'w')
+            except PermissionError:
+                # file is open, adjust name and print warning
+                print('pylightxl - Cannot write to existing file <{}> that is open in excel.'.format(new_fn))
+                print('     New temporary file was written to <{}>'.format('new_' + new_fn))
+                new_fn = 'new_' + new_fn
+                f = open(new_fn, 'w')
+            finally:
+                max_row, max_col = db.ws(sheet).size
+                for r in range(1, max_row + 1):
+                    row = []
+                    for c in range(1, max_col + 1):
+                        val = db.ws(sheet).index(r, c)
+                        row.append(str(val))
+
+                    f.write(delimiter.join(row))
+                    f.write('\n')
+                f.close()
+
+
 ########################################################################################################
 # SEC-05: DATABASE FUNCTIONS
 ########################################################################################################

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pylightxl", # Replace with your own username
-    version="1.46",
+    version="1.47",
     author="Viktor Kis",
     author_email="",
     description="A light weight excel read/writer for python27 and python3 with no dependencies",

diff --git a/test/input.csv b/test/input.csv
@@ -0,0 +1,5 @@
+11	12.0	.13	'14'	 	16
+
+
+31		 false 		 true 	
+	42		 		
diff --git a/test/temp_wb.xlsx b/test/temp_wb.xlsx
diff --git a/test/test_readxl.py b/test/test_readxl.py
@@ -44,6 +44,37 @@ def test_bad_readxl_sheetnames(self):
             self.assertRaises(e, 'Error - Sheetname ({}) is not in the workbook.'.format('not-a-sheet'))
 
 
+class TestReadCSV(TestCase):
+
+    def test_readcsv(self):
+
+        db = xl.readcsv(fn='input.csv', delimiter='\t', ws='sh2')
+
+        self.assertEqual(11, db.ws('sh2').index(1, 1))
+        self.assertEqual(12.0, db.ws('sh2').index(1, 2))
+        self.assertEqual(0.13, db.ws('sh2').index(1, 3))
+        self.assertEqual("'14'", db.ws('sh2').index(1, 4))
+        self.assertEqual(" ", db.ws('sh2').index(1, 5))
+        self.assertEqual(16, db.ws('sh2').index(1, 6))
+        self.assertEqual('', db.ws('sh2').index(2, 1))
+        self.assertEqual('', db.ws('sh2').index(2, 2))
+        self.assertEqual('', db.ws('sh2').index(2, 3))
+        self.assertEqual('', db.ws('sh2').index(2, 4))
+        self.assertEqual('', db.ws('sh2').index(2, 5))
+        self.assertEqual('', db.ws('sh2').index(2, 6))
+
+        self.assertEqual(31, db.ws('sh2').index(4, 1))
+        self.assertEqual('', db.ws('sh2').index(4, 2))
+        self.assertEqual(False, db.ws('sh2').index(4, 3))
+        self.assertEqual('', db.ws('sh2').index(4, 4))
+        self.assertEqual(True, db.ws('sh2').index(4, 5))
+        self.assertEqual('', db.ws('sh2').index(4, 6))
+        self.assertEqual(42, db.ws('sh2').index(5, 2))
+        self.assertEqual(' ', db.ws('sh2').index(5, 4))
+
+        self.assertEqual([5, 6], db.ws('sh2').size)
+
+
 class TestIntegration(TestCase):
 
     def test_pathlib_readxl(self):
@@ -309,6 +340,7 @@ def test_rename_ws(self):
         self.assertEqual(['two'], db.ws_names)
         self.assertEqual(10, db.ws('two').address('A1'))
 
+
 class TestWorksheet(TestCase):
 
     def test_ws_init(self):

diff --git a/test/test_writexl.py b/test/test_writexl.py
@@ -459,3 +459,59 @@ def test_integration_alt_writer(self):
         self.assertEqual('one', db_alt.ws('sh3').address('A1'))
 
 
+class TestWriteCSV(TestCase):
+
+    def test_writecsv(self):
+
+        db = xl.Database()
+        db.add_ws('sh1')
+        db.add_ws('sh2')
+        db.ws('sh1').update_index(1,1, 10)
+        db.ws('sh1').update_index(1,2, 10.0)
+        db.ws('sh1').update_index(1,3, '10.0')
+        db.ws('sh1').update_index(1,4, True)
+        db.ws('sh1').update_index(2,1, 20)
+        db.ws('sh1').update_index(2,2, 20.0)
+        db.ws('sh1').update_index(2,3, '20.0')
+        db.ws('sh1').update_index(2,4, False)
+        db.ws('sh1').update_index(3,5, ' ')
+
+
+        if 'outcsv_sh1.csv' in os.listdir('.'):
+            os.remove('outcsv_sh1.csv')
+        if 'outcsv_sh2.csv' in os.listdir('.'):
+            os.remove('outcsv_sh2.csv')
+
+        xl.writecsv(db=db, fn='outcsv', delimiter='\t', ws='sh1')
+
+        with open('outcsv_sh1.csv', 'r') as f:
+            lines = []
+            while True:
+                line = f.readline()
+
+                if not line:
+                    break
+
+                line = line.replace('\n', '').replace('\r', '')
+
+                lines.append(line.split('\t'))
+
+        self.assertEqual(['10', '10.0', '10.0', 'True', ''], lines[0])
+        self.assertEqual(['20', '20.0', '20.0', 'False', ''], lines[1])
+        self.assertEqual(['', '', '', '', ' '], lines[2])
+
+        if 'outcsv_sh1.csv' in os.listdir('.'):
+            os.remove('outcsv_sh1.csv')
+        if 'outcsv_sh2.csv' in os.listdir('.'):
+            os.remove('outcsv_sh2.csv')
+
+        xl.writecsv(db=db, fn='outcsv')
+
+        self.assertTrue('outcsv_sh1.csv' in os.listdir('.'))
+        self.assertTrue('outcsv_sh2.csv' in os.listdir('.'))
+
+        if 'outcsv_sh1.csv' in os.listdir('.'):
+            os.remove('outcsv_sh1.csv')
+        if 'outcsv_sh2.csv' in os.listdir('.'):
+            os.remove('outcsv_sh2.csv')
+