# gncxml Usage Examples

## Setup

In [1]:
import sys
import gncxml

try:
    book = gncxml.Book("mybook.gnucash")
except OSError as err:
    sys.exit(err)

## API Reference

In [2]:
help(gncxml.Book)

Help on class Book in module gncxml.book:

class Book(builtins.object)
 |  Book(gncfile) -> None
 |  
 |  Parse GnuCash XML data file and provide interface to read journal entries and master data tables.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, gncfile) -> None
 |      Parameters
 |      ----------
 |      gncfile : file name or file object (io.BufferedReader)
 |          GnuCash data file (XML format)
 |  
 |  accounts(self) -> 'pandas.DataFrame'
 |      Return account entries as pandas.DataFrame.
 |  
 |  commodities(self) -> 'pandas.DataFrame'
 |      Return commodity (aka currency) entries as pandas.DataFrame.
 |  
 |  list_accounts(self) -> 'pandas.DataFrame'
 |      Return account entries as flat pandas.DataFrame after joining relevant tables.
 |  
 |  list_commodities(self) -> 'pandas.DataFrame'
 |      Return commodity (aka currency) entries as pandas.DataFrame (synonym for commodities()).
 |  
 |  list_prices(self) -> 'pandas.DataFrame'
 |      Return commodity p

## Book.list_*()

Book.list_*() methods are the primary one-stop methods that return the entries as flat pandas.DataFrame objects after joining relevant internal tables.

In [3]:
book.list_accounts().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 405 entries, ('guid', '64806997695ef7c53fc2e6eef90f6d2b') to ('guid', '611d01ac0eae447490efb74e30848f18')
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   path              404 non-null    object 
 1   toplevel          404 non-null    object 
 2   parent_path       394 non-null    object 
 3   name              405 non-null    object 
 4   type              405 non-null    object 
 5   code              38 non-null     object 
 6   description       388 non-null    object 
 7   cmd_space         405 non-null    object 
 8   cmd_id            404 non-null    object 
 9   cmd_name          404 non-null    object 
 10  cmd_xcode         80 non-null     object 
 11  cmd_fraction      404 non-null    object 
 12  cmd_exponent      404 non-null    float64
 13  cmd_quote_source  302 non-null    object 
dtypes: float64(1), object(13)
memory usage: 65.0+ KB


In [4]:
book.list_commodities().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 67 entries, ('CURRENCY', 'CNY') to ('template', 'template')
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          67 non-null     object 
 1   xcode         57 non-null     object 
 2   fraction      67 non-null     object 
 3   exponent      67 non-null     float64
 4   quote_source  16 non-null     object 
dtypes: float64(1), object(4)
memory usage: 5.6+ KB


In [5]:
book.list_prices().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6324 entries, 0 to 6323
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype              
---  ------              --------------  -----              
 0   time                6324 non-null   datetime64[ns, UTC]
 1   cmd_space           6324 non-null   object             
 2   cmd_id              6324 non-null   object             
 3   crncy_space         6324 non-null   object             
 4   crncy_id            6324 non-null   object             
 5   source              6324 non-null   object             
 6   type                6324 non-null   object             
 7   value               6324 non-null   object             
 8   value_frac          6324 non-null   object             
 9   cmd_name            6324 non-null   object             
 10  cmd_xcode           5606 non-null   object             
 11  cmd_fraction        6324 non-null   object             
 12  cmd_exponent        6324 non-null 

In [6]:
book.list_splits().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 79525 entries, ('guid', 'faf151dc359482b7a8c9a3b877da80ab') to ('guid', '3cbdbf9d3e5e4329a4eb58b2db5a0863')
Data columns (total 35 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   action                  1782 non-null   object        
 1   memo                    9848 non-null   object        
 2   reconciled              79525 non-null  object        
 3   value                   79525 non-null  object        
 4   value_frac              79525 non-null  object        
 5   quantity                79525 non-null  object        
 6   quantity_frac           79525 non-null  object        
 7   act_idtype              79525 non-null  object        
 8   act_id                  79525 non-null  object        
 9   trn_idtype              79525 non-null  object        
 10  trn_id                  79525 non-null  object        
 11  act_path                

In [7]:
book.list_transactions().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 36900 entries, ('guid', '9e4b4e15282c605759a1fa8ac971cfbc') to ('guid', '6d7d50d8ef8742b1b7e0bbce6ee22f85')
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date                36900 non-null  datetime64[ns]
 1   num                 5129 non-null   object        
 2   description         36900 non-null  object        
 3   crncy_space         36900 non-null  object        
 4   crncy_id            36900 non-null  object        
 5   crncy_name          36900 non-null  object        
 6   crncy_xcode         0 non-null      object        
 7   crncy_fraction      36900 non-null  object        
 8   crncy_exponent      36900 non-null  float64       
 9   crncy_quote_source  34416 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(8)
memory usage: 4.3+ MB


## Other methods

Other methods return raw tables as pandas.DataFrame objects. Most of tables are indexed for convenience of join operations.

In [8]:
book.accounts().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 405 entries, ('guid', '64806997695ef7c53fc2e6eef90f6d2b') to ('guid', '611d01ac0eae447490efb74e30848f18')
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   path         404 non-null    object
 1   toplevel     404 non-null    object
 2   parent_path  394 non-null    object
 3   name         405 non-null    object
 4   type         405 non-null    object
 5   code         38 non-null     object
 6   description  388 non-null    object
 7   cmd_space    405 non-null    object
 8   cmd_id       404 non-null    object
dtypes: object(9)
memory usage: 49.2+ KB


In [9]:
book.commodities().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 67 entries, ('CURRENCY', 'CNY') to ('template', 'template')
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          67 non-null     object 
 1   xcode         57 non-null     object 
 2   fraction      67 non-null     object 
 3   exponent      67 non-null     float64
 4   quote_source  16 non-null     object 
dtypes: float64(1), object(4)
memory usage: 5.6+ KB


In [10]:
book.prices().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6324 entries, 0 to 6323
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   time         6324 non-null   datetime64[ns, UTC]
 1   cmd_space    6324 non-null   object             
 2   cmd_id       6324 non-null   object             
 3   crncy_space  6324 non-null   object             
 4   crncy_id     6324 non-null   object             
 5   source       6324 non-null   object             
 6   type         6324 non-null   object             
 7   value        6324 non-null   object             
 8   value_frac   6324 non-null   object             
dtypes: datetime64[ns, UTC](1), object(8)
memory usage: 444.8+ KB


In [11]:
book.splits().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 79525 entries, ('guid', 'faf151dc359482b7a8c9a3b877da80ab') to ('guid', '3cbdbf9d3e5e4329a4eb58b2db5a0863')
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   action         1782 non-null   object
 1   memo           9848 non-null   object
 2   reconciled     79525 non-null  object
 3   value          79525 non-null  object
 4   value_frac     79525 non-null  object
 5   quantity       79525 non-null  object
 6   quantity_frac  79525 non-null  object
 7   act_idtype     79525 non-null  object
 8   act_id         79525 non-null  object
 9   trn_idtype     79525 non-null  object
 10  trn_id         79525 non-null  object
dtypes: object(11)
memory usage: 9.7+ MB


In [12]:
book.transactions().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 36900 entries, ('guid', '9e4b4e15282c605759a1fa8ac971cfbc') to ('guid', '6d7d50d8ef8742b1b7e0bbce6ee22f85')
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         36900 non-null  datetime64[ns]
 1   num          5129 non-null   object        
 2   description  36900 non-null  object        
 3   crncy_space  36900 non-null  object        
 4   crncy_id     36900 non-null  object        
dtypes: datetime64[ns](1), object(4)
memory usage: 2.9+ MB
