# SPM

Arquivos SPM contém os dados de consumo de um determinado cliente

In [1]:
import pandas as pd
import glob

Configurando a precisão do pandas para 2 casa decimais

In [2]:
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
# Dicionário de data types
_spm_dtype_dict = {
    "Subs. Service Line No.": str,
    "Compute Type": str,
    "Active": str,
    "Usage Line Number": float,
    # "Metered service date": np.datetime64,
    "Sub Account": float,
    "Computed Quantity": float,
    "Existing Entitled Qty": float,
    "Usage Quantity": float,
    "UOM": str,
    "Product": str,
    "Parent Product": str,
    "Net Unit Price": float,
    "SPM Document Number": str,
    "Line Net Amount": float,
    "Bill To Plan Number": float,
    "Consumed Quantity": float,
    "Overage": str,
    "Compute Source": float,
    "Status": str,
    "Available Amt After Computing": float,
    "Is Credit Memo?": str,
    "Spm Partner Credit Amt": float,
    "Bill to Customer": str,
    "Trxn Extension ID": float,
    "Line Net Amount Increased Precision": float,
    "UCM Payment Credit Amt": float,
}

Os arquivos com os dados brutos do SPM estão localizados na pasta `consumption_data` em formato CSV.

Vamos criar uma lista com todos esses aquivos.

In [4]:
csv_data_files = [file for file in glob.glob('./consumption_data/*.csv')]
csv_data_files

['./consumption_data\\ativos_0621.csv',
 './consumption_data\\ativos_0721.csv',
 './consumption_data\\ativos_0821.csv',
 './consumption_data\\ativos_0921.csv',
 './consumption_data\\ativos_1021.csv',
 './consumption_data\\bbts_0621.csv',
 './consumption_data\\bbts_0721.csv',
 './consumption_data\\bbts_0821.csv',
 './consumption_data\\bbts_0921.csv',
 './consumption_data\\bbts_1021.csv']

Criando os dataframes a partir dos arquivos CSV

In [5]:
col_list = ['Computed Quantity', 'Metered service date', 'UOM', 'Product', 'Net Unit Price', 'Line Net Amount', 'Bill to Customer']
dataframes_list = [
    pd.read_csv(csv_file, dtype=_spm_dtype_dict, parse_dates=["Metered service date"], usecols=col_list)[:-1] 
    for csv_file in csv_data_files
]
df = pd.concat(dataframes_list)
df.head()

Unnamed: 0,Metered service date,Computed Quantity,UOM,Product,Net Unit Price,Line Net Amount,Bill to Customer
0,2021-06-17,1.0,PORT HOUR,B88326 - Oracle Cloud Infrastructure - FastCon...,5.38,5.38,15820392 - Netmanagement Informatica Ltda - Epp
1,2021-06-19,1.0,PORT HOUR,B88326 - Oracle Cloud Infrastructure - FastCon...,5.38,5.38,15820392 - Netmanagement Informatica Ltda - Epp
2,2021-06-16,1.0,PORT HOUR,B88326 - Oracle Cloud Infrastructure - FastCon...,5.38,5.38,15820392 - Netmanagement Informatica Ltda - Epp
3,2021-06-07,1.0,PORT HOUR,B88326 - Oracle Cloud Infrastructure - FastCon...,5.38,5.38,15820392 - Netmanagement Informatica Ltda - Epp
4,2021-06-02,1.0,PORT HOUR,B88326 - Oracle Cloud Infrastructure - FastCon...,5.38,5.38,15820392 - Netmanagement Informatica Ltda - Epp


Removendo as linhas com créditos não utilizados na modalidade funded allocation

In [6]:
df = df[df["UOM"] != "CURRENCY UNIT"]
# Uma alternativa a sintaxe acima poderia ser
# df.drop(df.loc[df["UOM"] == "CURRENCY UNIT"].index, inplace=True)

Expandindo as colunas

In [7]:
df[['PartNumber', 'Categoria Produto', 'Produto']] = df['Product'].str.split(pat=" - ", n=2, expand=True)
df[['Numero Cliente', 'Cliente']] = df['Bill to Customer'].str.split(pat='-', n=1, expand=True)
df.drop(labels=['Product', 'Bill to Customer'], axis=1, inplace=True)

Renomeando as colunas

In [8]:
df.rename(columns={"Metered service date": "Data",
                   "Computed Quantity": "Quantidade",
                   "UOM": "Unidade de Medida",
                   "Net Unit Price": "Preco Unitario BRL",
                   "Line Net Amount":"Consumo BRL"},
          inplace=True)

Criando o campo calculado `Consumo USD`

In [9]:
df['Consumo USD'] = df['Consumo BRL'] / 5.31

Colocando a `Data` como o índice do dataframe

In [10]:
df.set_index('Data', inplace=True)

In [11]:
df.head()

Unnamed: 0_level_0,Quantidade,Unidade de Medida,Preco Unitario BRL,Consumo BRL,PartNumber,Categoria Produto,Produto,Numero Cliente,Cliente,Consumo USD
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-06-17,1.0,PORT HOUR,5.38,5.38,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,15820392,Netmanagement Informatica Ltda - Epp,1.01
2021-06-19,1.0,PORT HOUR,5.38,5.38,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,15820392,Netmanagement Informatica Ltda - Epp,1.01
2021-06-16,1.0,PORT HOUR,5.38,5.38,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,15820392,Netmanagement Informatica Ltda - Epp,1.01
2021-06-07,1.0,PORT HOUR,5.38,5.38,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,15820392,Netmanagement Informatica Ltda - Epp,1.01
2021-06-02,1.0,PORT HOUR,5.38,5.38,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,15820392,Netmanagement Informatica Ltda - Epp,1.01


Reordenando as colunas

In [12]:
df.columns

Index(['Quantidade', 'Unidade de Medida', 'Preco Unitario BRL', 'Consumo BRL',
       'PartNumber', 'Categoria Produto', 'Produto', 'Numero Cliente',
       'Cliente', 'Consumo USD'],
      dtype='object')

In [13]:
df = df[['Numero Cliente', 'Cliente','PartNumber', 'Categoria Produto', 'Produto', 'Unidade de Medida', 'Quantidade', 'Preco Unitario BRL', 'Consumo BRL', 'Consumo USD']]
df.head()

Unnamed: 0_level_0,Numero Cliente,Cliente,PartNumber,Categoria Produto,Produto,Unidade de Medida,Quantidade,Preco Unitario BRL,Consumo BRL,Consumo USD
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-06-17,15820392,Netmanagement Informatica Ltda - Epp,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,PORT HOUR,1.0,5.38,5.38,1.01
2021-06-19,15820392,Netmanagement Informatica Ltda - Epp,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,PORT HOUR,1.0,5.38,5.38,1.01
2021-06-16,15820392,Netmanagement Informatica Ltda - Epp,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,PORT HOUR,1.0,5.38,5.38,1.01
2021-06-07,15820392,Netmanagement Informatica Ltda - Epp,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,PORT HOUR,1.0,5.38,5.38,1.01
2021-06-02,15820392,Netmanagement Informatica Ltda - Epp,B88326,Oracle Cloud Infrastructure,FastConnect 10 Gbps - Port Hour,PORT HOUR,1.0,5.38,5.38,1.01


Ordenando o dataframe

In [14]:
df = df.sort_index()

Salvando

In [15]:
with pd.ExcelWriter('Relatório de Consumo.xlsx') as writer:
    df.to_excel(writer, sheet_name='Dados de Consumo')