# pylightxl

https://pylightxl.readthedocs.io/en/latest/

In [None]:
import pylightxl as xl

In [None]:
# dir(xl)

In [None]:
import sys
sys.version

### iris_short_2 dataset

### Read Excel File

In [None]:
fn = "../data/iris_short_2.xlsx"

In [None]:
# readxl returns a pylightxl database that holds all worksheets and its data
db1= xl.readxl(fn)

In [None]:
# return all sheetnames
tuple(db1.ws_names)

In [None]:
# read only selective sheetnames

db2 = xl.readxl(fn=fn, sheetnames=('Sheet1',))
db3 = xl.readxl(fn, sheetnames=('Sheet1','Sheet2'))  # bug fixed in 1.41
db4 = xl.readxl(fn, sheetnames=tuple(db1.ws_names))

In [None]:
# return all sheetnames
db2.ws_names

In [None]:
# dir(db1)

### Access Worksheet and Cell Data

* access by worksheet name (tab name) and cell address

In [None]:
db1.ws('Sheet1').address('A1')

* access by worksheet name (tab name) and cell index

In [None]:
db1.ws('Sheet1').index(row=2, col=2)

* access an entire row/col (note: empty cells as returned as")

In [None]:
db1.ws('Sheet1').row(2)  # row values start at 2

In [None]:
db1.ws('Sheet1').col(1) # col starts at 1

* get an entire row/col based on key-value (note: key is type sensitive)

In [None]:
# return column that has a cell value is sepal_length in row 1 (default)
db1.ws('Sheet1').keycol(key='sepal_length')

In [None]:
# specify a custom keyindex (not just row1)
db1.ws('Sheet1').keycol(key=5.1, keyindex=2)

In [None]:
# specify a custom keyrow
db1.ws('Sheet1').keyrow(key=5.1)

* get the size of worksheet

In [None]:
db1.ws('Sheet1').size

### Iterate through rows/cols

In [None]:
for row in db1.ws('Sheet1').rows:
    print(row)

In [None]:
for col in db1.ws('Sheet1').cols:
    print(col)

### Reading Semi Structured data

In [None]:
fn2 = "../data/Book1.xlsx"

In [None]:
db = xl.readxl(fn2, sheetnames=('Sheet1',))

In [None]:
db.ws_names

In [None]:
db.ws('Sheet1').rows

In [None]:
for r_idx, row in enumerate(db.ws('Sheet1').rows, 1):
    print(r_idx, row)

### Solution

* row indices

In [None]:
KEY = 'val1'

# pull out all the row indices where data groups start
keyrows = [r_idx for r_idx, row in enumerate(db.ws('Sheet1').rows, 1) if KEY in row]

In [None]:
# row indices
keyrows

In [None]:
keyrows[0]

In [None]:
keyrows[1]

In [None]:
db.ws('Sheet1')

In [None]:
# group1
db.ws('Sheet1').row(keyrows[0])

In [None]:
# col index for group1
db.ws('Sheet1').row(keyrows[0]).index(KEY) + 1

In [None]:
# group2
db.ws('Sheet1').row(keyrows[1])

In [None]:
# col index for group2
db.ws('Sheet1').row(keyrows[1]).index(KEY) + 1

* column indices

In [None]:
# find the column indices where data groups start

keycols = []
for keyrow in keyrows:
    # add +1 since Python index starts from 0
    keycols.append(db.ws('Sheet1').row(keyrow).index(KEY) + 1)
    
print(keycols)

In [None]:
# checking table 1
db.ws('Sheet1').row(keyrows[0])[0]

In [None]:
# checking table 2
db.ws('Sheet1').row(keyrows[1])[1]

* datagroups (dict)

In [None]:
# define a dict to hold your data groups
datagroups = {}

# populate datatables
for t_idx, keyrow in enumerate(keyrows,1):
    i = 0
    # data groups: keys are group IDs starting from 1, list: list of data rows (ie: val1, val2...)
    datagroups.update({t_idx: []})
    while True:
        # pull out the current group row of data, and remove leading cells with keycols
        datarow = db.ws('Sheet1').row(keyrow + i)[keycols[t_idx-1]:]
        # check if the current row is still part of the datagroup
        if datarow[0] == '':
            # current row is empty and is no longer part of the data group
            break
        datagroups[t_idx].append(datarow)
        i += 1

In [None]:
datagroups

In [None]:
type(datagroups)

In [None]:
print(datagroups.keys())

In [None]:
print(datagroups.values())

In [None]:
print(datagroups[1])

In [None]:
print(datagroups[2])

In [None]:
print(datagroups.items())

* Code snippet

In [None]:
# fg

import pylightxl as xl

db = xl.readxl("../data/Book1.xlsx")

keyrows = [
    rowID
    for rowID, row in enumerate(db.ws("Sheet1").rows, 1)
    if "val1" in row
]

keycols = []
for keyrow in keyrows:

    keycols.append(
        db.ws("Sheet1").row(keyrow).index("val1") + 1
    )

In [None]:
# fg

datagroups = {}

for tableIndex, keyrow in enumerate(keyrows, 1):
    i = 0
    datagroups.update({tableIndex: []})
    while True:
        datarow = db.ws("Sheet1").row(keyrow + i)[
            keycols[tableIndex - 1] :
        ]
        if datarow[0] == "":
            break
        datagroups[tableIndex].append(datarow)
        i += 1

In [None]:
print(datagroups[1])
print(datagroups[2])