Skip to content

Commit

Permalink
Handle worksheets via relationship ID rather than implicit names (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
dglsparsons committed Aug 21, 2020
1 parent 1e749a1 commit c5a5506
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 5 deletions.
8 changes: 8 additions & 0 deletions file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,11 @@ func TestNewReaderFromXlsxBytes(t *testing.T) {
require.NoError(t, err)
require.Equal(t, []string{"datarefinery_groundtruth_400000"}, actual.Sheets)
}

func TestDeletedSheet(t *testing.T) {
actual, err := OpenFile("./test/test-deleted-sheet.xlsx")

require.NoError(t, err)
err = actual.Close()
require.NoError(t, err)
}
47 changes: 42 additions & 5 deletions sheets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package xlsxreader
import (
"archive/zip"
"encoding/xml"
"strconv"
"fmt"
)

// workbook is a struct representing the data we care about from the workbook.xml file.
Expand All @@ -13,8 +13,27 @@ type workbook struct {

// sheet is a struct representing the sheet xml element.
type sheet struct {
Name string `xml:"name,attr,omitempty"`
SheetID int `xml:"sheetId,attr,omitempty"`
Name string `xml:"name,attr,omitempty"`
RelationshipID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr,omitempty"`
}

// relationships is a struct representing the data we care about from the _rels/workboox.xml.rels file.
type relationships struct {
Relationships []relationship `xml:"Relationship"`
}

type relationship struct {
ID string `xml:"Id,attr,omitempty"`
Target string `xml:"Target,attr,omitempty"`
}

func getFileNameFromRelationships(rels []relationship, s sheet) (string, error) {
for _, rel := range rels {
if rel.ID == s.RelationshipID {
return "xl/" + rel.Target, nil
}
}
return "", fmt.Errorf("Unable to find file with relationship %s", s.RelationshipID)
}

// getWorksheets loads the workbook.xml file and extracts a list of worksheets, along
Expand All @@ -31,17 +50,35 @@ func getWorksheets(files []*zip.File) ([]string, *map[string]*zip.File, error) {
return nil, nil, err
}

var wb workbook
wb := workbook{}
err = xml.Unmarshal(data, &wb)
if err != nil {
return nil, nil, err
}

relsFile, err := getFileForName(files, "xl/_rels/workbook.xml.rels")
if err != nil {
return nil, nil, err
}
relsData, err := readFile(relsFile)
if err != nil {
return nil, nil, err
}

rels := relationships{}
err = xml.Unmarshal(relsData, &rels)
if err != nil {
return nil, nil, err
}

wsFileMap := make(map[string]*zip.File)
sheetNames := make([]string, len(wb.Sheets))

for i, sheet := range wb.Sheets {
sheetFilename := "xl/worksheets/sheet" + strconv.Itoa(sheet.SheetID) + ".xml"
sheetFilename, err := getFileNameFromRelationships(rels.Relationships, sheet)
if err != nil {
return nil, nil, err
}
sheetFile, err := getFileForName(files, sheetFilename)
if err != nil {
return nil, nil, err
Expand Down
Binary file added test/test-deleted-sheet.xlsx
Binary file not shown.

0 comments on commit c5a5506

Please sign in to comment.