In [None]:

import os
import pandas as pd


In [51]:
import pandas as pd
import numpy as np

class MatrixLoader:
    def __init__(self, year, agg):
        self.base_url = 'https://github.com/GuilhermeZiegler/input_output/raw/master/input_output/data/iom'
        self.year = year
        self.agg = agg
        self.file_path = f'{self.base_url}/{self.agg}/{self.year}/MIP-BR-CN10-68S-{self.year}.xlsx'
        self.row_indexes = None
        self.column_indexes = None
        self.data_matrix = None
        self.load_data()
        self.aligned_matrix = self.align_matrix()

    def load_data(self):
        xls = pd.ExcelFile(self.file_path)
        for sheet_name in xls.sheet_names:
            if sheet_name == 'Referência':
                continue  # Ignore the 'Referência' sheet
            self.extract_indices_and_matrix(sheet_name, xls)
            break  # Assuming only one relevant sheet needs to be loaded

    def extract_indices_and_matrix(self, sheet_name, xls):
        df = pd.read_excel(xls, sheet_name=sheet_name, header=None)

        # Extract row indexes starting from A5
        row_start_index = 4
        self.row_indexes = df.iloc[row_start_index:, :3].reset_index(drop=True)

        # Extract column indexes starting from D2
        col_start_row, col_end_row = 1, 3
        col_start_col = 3
        self.column_indexes = df.iloc[col_start_row:col_end_row+1, col_start_col:].reset_index(drop=True)

        # Extract data matrix starting from D5
        self.data_matrix = df.iloc[row_start_index:, col_start_col:].reset_index(drop=True)
        self.data_matrix = self.data_matrix.dropna(axis=1, how='all')

    def align_matrix(self):
        # Ensure the data matrix and column indexes have matching shapes
        col_len_data = self.data_matrix.shape[1]
        col_len_index = self.column_indexes.shape[1]

        if col_len_data < col_len_index:
            # Add NaN columns to data matrix
            for _ in range(col_len_index - col_len_data):
                self.data_matrix[f'NaN_{_}'] = np.nan
        elif col_len_data > col_len_index:
            # Truncate the data matrix columns
            self.data_matrix = self.data_matrix.iloc[:, :col_len_index]

        # Create MultiIndex for rows
        row_index = pd.MultiIndex.from_frame(self.row_indexes)

        # Create MultiIndex for columns
        col_index = pd.MultiIndex.from_frame(self.column_indexes.T)

        # Align the data matrix with NaN filling for missing values
        aligned_df = pd.DataFrame(self.data_matrix.values, index=row_index, columns=col_index)
        aligned_df = aligned_df.reindex(index=row_index, columns=col_index)

        return aligned_df

# Exemplo de uso
year = '2010'
agg = '68S'
loader = MatrixLoader(year, agg)

# Acessar os índices de linha
print("Índices de Linha:")
display(loader.row_indexes)

# Acessar os índices de coluna
print("Índices de Coluna:")
display(loader.column_indexes)

# Acessar a matriz de dados
print("Matriz de Dados:")
display(loader.data_matrix)

# Acessar a matriz alinhada com MultiIndex
print("Matriz Alinhada:")
display(loader.aligned_matrix)



Índices de Linha:


Unnamed: 0,0,1,2
0,0191,"Agricultura, inclusive o apoio à agricultura e...",1.0
1,0192,"Pecuária, inclusive o apoio à pecuária",2.0
2,0280,Produção florestal; pesca e aquicultura,3.0
3,0580,Extração de carvão mineral e de minerais não-m...,4.0
4,0680,"Extração de petróleo e gás, inclusive as ativi...",5.0
...,...,...,...
66,9480,Organizações associativas e outros serviços pe...,67.0
67,9700,Serviços domésticos,68.0
68,,Total\ndo produto,69.0
69,,,


Índices de Coluna:


Unnamed: 0,3,4,5,6,7,8,9,10,11,12,...,124,125,126,127,128,129,130,131,132,133
0,01911,01912,01913,01914,01915,01916,01917,01918,01919,01921,...,86911,86921,90801,94801,94802,94803,97001,,,
1,"Arroz, trigo e outros cereais",Milho em grão,"Algodão herbáceo, outras fibras da lav. tempor...",Cana-de-açúcar,Soja em grão,Outros produtos e serviços da lavoura temporária,Laranja,Café em grão,Outros produtos da lavoura permanente,"Bovinos e outros animais vivos, prods. animal,...",...,Saúde pública,Saúde privada,"Serviços de artes, cultura, esporte e recreação","Organizações patronais, sindicais e outros ser...","Manutenção de computadores, telefones e objeto...",Serviços pessoais,Serviços domésticos,TOTAL,,Conferência
2,1,2,3,4,5,6,7,8,9,10,...,122,123,124,125,126,127,128,129,,


Matriz de Dados:


Unnamed: 0,3,4,5,6,7,8,9,10,11,12,...,124,125,126,127,128,129,130,131,133,NaN_0
0,7557,13554,5155,32731,38875,30796,5975,11007,13181,3030,...,0,0,0,0,0,0,0,168861,0,
1,280,1000,6,160,1225,1545,52,409,293,39069,...,0,0,0,0,0,0,0,83448,0,
2,17,30,0,3,24,136,2,18,47,166,...,0,0,0,0,0,0,0,20332,0,
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,14838,0,
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,117330,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,0,0,0,0,0,0,0,0,0,0,...,0,0,0,66640,13070,23936,0,104279,0,
67,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,40334,40334,0,
68,8042,14717,5161,32894,40124,32643,6029,11606,13558,42279,...,99864,116247,23204,66640,18483,24019,40334,6599149,0,
69,,,,,,,,,,,...,,,,,,,,,,


Matriz Alinhada:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,01911,01912,01913,01914,01915,01916,01917,01918,01919,01921,...,86911,86921,90801,94801,94802,94803,97001,NaN,NaN,NaN
Unnamed: 0_level_1,Unnamed: 1_level_1,1,"Arroz, trigo e outros cereais",Milho em grão,"Algodão herbáceo, outras fibras da lav. temporária",Cana-de-açúcar,Soja em grão,Outros produtos e serviços da lavoura temporária,Laranja,Café em grão,Outros produtos da lavoura permanente,"Bovinos e outros animais vivos, prods. animal, caça e serv.",...,Saúde pública,Saúde privada,"Serviços de artes, cultura, esporte e recreação","Organizações patronais, sindicais e outros serviços associativos","Manutenção de computadores, telefones e objetos domésticos",Serviços pessoais,Serviços domésticos,TOTAL,NaN,Conferência
Unnamed: 0_level_2,Unnamed: 1_level_2,2,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,122.0,123.0,124.0,125.0,126.0,127.0,128.0,129.0,NaN,NaN
0,1,2,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3
0191,"Agricultura, inclusive o apoio à agricultura e a pós-colheita",1.0,7557,13554,5155,32731,38875,30796,5975,11007,13181,3030,...,0,0,0,0,0,0,0,168861,0,
0192,"Pecuária, inclusive o apoio à pecuária",2.0,280,1000,6,160,1225,1545,52,409,293,39069,...,0,0,0,0,0,0,0,83448,0,
0280,Produção florestal; pesca e aquicultura,3.0,17,30,0,3,24,136,2,18,47,166,...,0,0,0,0,0,0,0,20332,0,
0580,Extração de carvão mineral e de minerais não-metálicos,4.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,14838,0,
0680,"Extração de petróleo e gás, inclusive as atividades de apoio",5.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,117330,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9480,Organizações associativas e outros serviços pessoais,67.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,66640,13070,23936,0,104279,0,
9700,Serviços domésticos,68.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,40334,40334,0,
,Total\ndo produto,69.0,8042,14717,5161,32894,40124,32643,6029,11606,13558,42279,...,99864,116247,23204,66640,18483,24019,40334,6599149,0,
,,,,,,,,,,,,,...,,,,,,,,,,


In [42]:
loader.column_indexes.shape

(3, 131)

Unnamed: 0,0,1,2
66,9480.0,Organizações associativas e outros serviços pe...,67.0
67,9700.0,Serviços domésticos,68.0
68,,Total\ndo produto,69.0
69,,,
70,,Conferência,


In [46]:
loader.data_matrix

Unnamed: 0,3,4,5,6,7,8,9,10,11,12,...,123,124,125,126,127,128,129,130,131,133
0,7557,13554,5155,32731,38875,30796,5975,11007,13181,3030,...,0,0,0,0,0,0,0,0,168861,0
1,280,1000,6,160,1225,1545,52,409,293,39069,...,0,0,0,0,0,0,0,0,83448,0
2,17,30,0,3,24,136,2,18,47,166,...,0,0,0,0,0,0,0,0,20332,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,14838,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,117330,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,0,0,0,0,0,0,0,0,0,0,...,0,0,0,21873,0,0,0,0,23299,0
66,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,66640,13070,23936,0,104279,0
67,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,40334,40334,0
68,8042,14717,5161,32894,40124,32643,6029,11606,13558,42279,...,58285,99864,116247,23204,66640,18483,24019,40334,6599149,0


In [None]:
year = '2010'
agg = 'S68'
loader = MatrixLoader(year, agg)

# Access the data matrix for a specific sheet
data_matrix_producao = loader.get_data_matrix('Producao')
print(data_matrix_producao)


In [None]:
producao_df

In [None]:
file_path = f"input_output/data/iom/{agg}/{year}"
df = pd.read_excel(file_path)
# excel_reader = ExcelReader(file_path)

In [None]:
file_path = '..//data//S68//2018//MIP-BR-CN10-68S-2014.xlsx'


In [None]:
df = pd.read_excel(file_path)
