In [1]:
import numpy as np
import pandas as pd
from openpyxl import load_workbook
from openpyxl.pivot.fields import Missing

In [4]:
file_path = '../data/vendas-combustiveis-m3.xlsx'

In [5]:
workbook = load_workbook(file_path)
worksheet = workbook['Plan1']

In [6]:
pivot_name = 'Tabela dinâmica1'

In [7]:
# Extract the pivot table object from the worksheet
pivot_table = [p for p in worksheet._pivots if p.name == pivot_name][0]


In [8]:
# Extract a dict of all cache fields and their respective values
fields_map = {}
for field in pivot_table.cache.cacheFields:
    if field.sharedItems.count > 0:
        # take care of cases where f.v returns an AttributeError because the cell is empty
        # fields_map[field.name] = [f.v for f in field.sharedItems._fields]
        l = []
        for f in field.sharedItems._fields:
            try:
                l += [f.v]
            except AttributeError:
                l += [""]
        fields_map[field.name] = l


In [9]:
# Extract all rows from cache records. Each row is initially parsed as a dict
column_names = [field.name for field in pivot_table.cache.cacheFields]


In [10]:
rows = []
for record in pivot_table.cache.records.r:
    # If some field in the record in missing, we replace it by NaN
    record_values = [
        field.v if not isinstance(field, Missing) else np.nan for field in record._fields
    ]

    row_dict = {k: v for k, v in zip(column_names, record_values)}

    # Shared fields are mapped as an Index, so we replace the field index by its value
    for key in fields_map:
        row_dict[key] = fields_map[key][row_dict[key]]

    rows.append(row_dict)


In [11]:
df = pd.DataFrame.from_dict(rows)
df

Unnamed: 0,COMBUSTÍVEL,ANO,REGIÃO,ESTADO,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,TOTAL
0,GASOLINA C (m3),2000.0,REGIÃO NORTE,RONDÔNIA,136073.253,9563.263,11341.229,9369.746,10719.983,11165.968,12312.451,11220.97,12482.281,13591.122,11940.57,11547.576,10818.094
1,GASOLINA C (m3),2000.0,REGIÃO NORTE,ACRE,3358.346,40001.853,3065.758,3495.29,2946.93,3023.92,3206.93,3612.58,3264.46,3835.74,3676.571,3225.61,3289.718
2,GASOLINA C (m3),2000.0,REGIÃO NORTE,AMAZONAS,20766.918,21180.919,242742.352,17615.604,20258.2,18741.344,19604.023,20221.674,20792.616,19912.898,21869.338,21145.643,20633.175
3,GASOLINA C (m3),2000.0,REGIÃO NORTE,RORAIMA,3716.032,3200.4,3339.332,43338.929,3259.3,3636.216,3631.569,3348.416,3394.016,4078.616,3346.616,4029.9,4358.516
4,GASOLINA C (m3),2000.0,REGIÃO NORTE,PARÁ,29755.907,28661.951,28145.784,29294.796,359575.398,28830.479,32297.047,27310.979,29396.384,26511.009,36553.25,31807.84,31009.972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4531,GLP (m3),2020.0,REGIÃO SUL,RIO GRANDE DO SUL,73247.92029,81469.222826,75607.009058,72818.82971,,,,618788.938406,58679.211957,57649.414855,68522.800725,64045.161232,66749.367754
4532,GLP (m3),2020.0,REGIÃO CENTRO-OESTE,MATO GROSSO DO SUL,15190.643116,15877.811594,16876.476449,16036.112319,14687.788043,,,,137899.302536,13774.106884,14137.695652,16281.139493,15037.528986
4533,GLP (m3),2020.0,REGIÃO CENTRO-OESTE,MATO GROSSO,19118.619565,18773.081522,19899.867754,20678.38587,18972.943841,18051.436594,,,,173256.967391,18763.161232,18321.987319,20677.483696
4534,GLP (m3),2020.0,REGIÃO CENTRO-OESTE,GOIÁS,53302.086957,48894.621377,47184.788043,49687.793478,52665.449275,50069.005435,48032.530797,,,,443268.73913,46850.585145,46581.878623
