### Importação das bibliotecas

In [1]:
import pandas as pd
import numpy as np

import sqlalchemy
from urllib.parse import quote_plus
from sqlalchemy import create_engine, text
from sqlalchemy.engine.base import Connection
from sqlalchemy.engine import URL
from datetime import date, datetime

### Conexão com o Banco

In [2]:
def conn_bd():

    # pyodbc stuff for MS SQL Server Express
    driver='{ODBC Driver 17 for SQL Server}'
    server='DESKTOP-LG9U8DH'
    database='Raizen'
    trusted_connection='yes'

    # pyodbc connection string
    connection_string = f'DRIVER={driver};SERVER={server};\
                          DATABASE={database};\
                          ;\ TRUSTED_CONNECTION={trusted_connection}'
    
    connection_string += f'DATABASE={database};'
    connection_string += f'TRUSTED_CONNECTION={trusted_connection}'



    try:
    # create sqlalchemy engine connection URL
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = sqlalchemy.create_engine(connection_url)
        print("Conexao realizada!")
    except Exception as error:
        error = str(error)
        print("Conexao não realizada! " + error)


    
    return engine

### Lendo os dados brutos do schema Stage

In [3]:
engine = conn_bd()

query = 'SELECT * FROM [Stage].[derivados_petroleo]'
df_stage = pd.read_sql(text(query), engine.connect())

Conexao realizada!


### Exploração dos dados

In [4]:
df_stage.shape

(4968, 18)

In [5]:
df_stage.sample(4)

Unnamed: 0,COMBUSTÍVEL,ANO,REGIÃO,ESTADO,UNIDADE,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,TOTAL
2898,ÓLEO COMBUSTÍVEL (m3),2017,REGIÃO NORTE,PARÁ,m3,75121.312,62377.275,79561.349,77845.494,68735.727,68554.14,78117.233,82128.812,72309.391,79397.393,75107.01,71284.835,890539.971
2795,ÓLEO DIESEL (m3),2004,REGIÃO NORTE,PARÁ,m3,96763.712,81498.983,93777.04,94440.449,100240.9,105791.157,113539.667,119564.459,126678.779,118302.472,124360.664,122328.897,1297287.179
4951,GASOLINA C (m3),2020,REGIÃO NORTE,TOCANTINS,m3,29724.05,28147.87,26681.11,25667.5,24923.54,27391.885,30991.99,28500.95,30233.69,32556.27,31717.36,36449.88,352986.095
4012,QUEROSENE DE AVIAÇÃO (m3),2018,REGIÃO NORTE,RONDÔNIA,m3,2186.582,1749.8,1899.716,1855.146,2225.414,1952.34,2340.537,2112.747,2135.103,2129.736,2169.214,2239.604,24995.939


In [6]:
df_stage.dtypes

COMBUSTÍVEL     object
ANO              int64
REGIÃO          object
ESTADO          object
UNIDADE         object
Jan            float64
Fev            float64
Mar            float64
Abr            float64
Mai            float64
Jun            float64
Jul            float64
Ago            float64
Set            float64
Out            float64
Nov            float64
Dez            float64
TOTAL          float64
dtype: object

### Criando a estrutura para o novo dataframe

vars auxiliares para a organização dos dados

In [14]:
anos = [f'{ano}' for ano in range(2000, 2023)]

meses = ['Jan', 'Fev', 'Mar', 'Abr', 'Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez' ]


for ano in anos:
    globals()[f'data_{ano}'] = [f'{ano}-{mes}-01' for mes in range(1, 13)]

anos_dict = {ano: globals().get(f"data_{ano}") for ano in anos}



In [6]:
df_final =  pd.DataFrame({})

In [7]:
df_final.shape

(0, 0)

Função para criar novo dataframe

In [16]:
def criar_df_final(var_data, mes, df):
    var_data = var_data
    var_value = df.loc[index, mes]
    var_uf = df.loc[index, 'ESTADO']
    var_product = df.loc[index, 'COMBUSTÍVEL']
    d = {'year_month':var_data,'uf': var_uf,'product': var_product,'unit': 'm3','volume': var_value,'created_at': datetime.now()}
    linha = pd.DataFrame(data=d, index=[0])
    df = pd.DataFrame({})
    df = pd.concat([df, linha], ignore_index=True)
    return df

In [18]:
for index, row in df_stage.iterrows():
    
    for ano in anos:

        if df_stage.loc[index, 'ANO'] == int(ano):   
            for i, mes in enumerate(meses):
                df_final = pd.concat([df_final, criar_df_final(anos_dict[(ano)][i], mes, df_stage)], ignore_index=True)
    

In [19]:
df_final.shape

(59616, 6)

In [15]:
df_final.dtypes

year_month            object
uf                    object
product               object
unit                  object
volume               float64
created_at    datetime64[ns]
dtype: object

In [24]:
df_final.sample(10)

Unnamed: 0,year_month,uf,product,unit,volume,created_at
5540,2011-9-01,AMAPÁ,QUEROSENE ILUMINANTE (m3),m3,0.0,2023-03-29 06:00:21.289016
39162,2016-7-01,PIAUÍ,GASOLINA C (m3),m3,50179.22,2023-03-29 06:01:03.705000
33219,2001-4-01,PARÁ,GLP (m3),m3,21336.665455,2023-03-29 06:00:53.043360
46022,2019-3-01,RIO GRANDE DO SUL,ÓLEO DIESEL (m3),m3,355112.017,2023-03-29 06:01:17.348721
25497,2012-10-01,MATO GROSSO DO SUL,QUEROSENE DE AVIAÇÃO (m3),m3,3867.545,2023-03-29 06:00:42.365798
43740,2018-1-01,RIO GRANDE DO NORTE,QUEROSENE ILUMINANTE (m3),m3,0.0,2023-03-29 06:01:12.685922
1129,2011-2-01,ACRE,GASOLINA DE AVIAÇÃO (m3),m3,45.054039,2023-03-29 06:00:17.588482
18749,2011-6-01,GOIÁS,ÓLEO COMBUSTÍVEL (m3),m3,23667.91,2023-03-29 06:00:34.425800
22251,2001-4-01,MATO GROSSO,GASOLINA DE AVIAÇÃO (m3),m3,703.006,2023-03-29 06:00:38.431666
11378,2003-3-01,CEARÁ,QUEROSENE DE AVIAÇÃO (m3),m3,6891.321,2023-03-29 06:00:26.724245


In [16]:
df_final = df_final.sort_values(by=['uf', 'year_month'], ascending=True)

In [18]:
df_final.head(10)

Unnamed: 0,year_month,uf,product,unit,volume,created_at
0,2000-1-01,ACRE,GASOLINA C (m3),m3,3065.758,2023-03-26 10:47:25.897913
12,2000-1-01,ACRE,GASOLINA DE AVIAÇÃO (m3),m3,45.45,2023-03-26 10:47:25.917020
24,2000-1-01,ACRE,QUEROSENE ILUMINANTE (m3),m3,0.0,2023-03-26 10:47:25.927051
36,2000-1-01,ACRE,QUEROSENE DE AVIAÇÃO (m3),m3,1154.561,2023-03-26 10:47:25.939560
48,2000-1-01,ACRE,ÓLEO DIESEL (m3),m3,18025.337,2023-03-26 10:47:25.949929
60,2000-1-01,ACRE,ÓLEO COMBUSTÍVEL (m3),m3,0.0,2023-03-26 10:47:25.959010
72,2000-1-01,ACRE,ETANOL HIDRATADO (m3),m3,501.46,2023-03-26 10:47:25.967005
84,2000-1-01,ACRE,GLP (m3),m3,1959.303636,2023-03-26 10:47:25.975630
9,2000-10-01,ACRE,GASOLINA C (m3),m3,3225.61,2023-03-26 10:47:25.913977
21,2000-10-01,ACRE,GASOLINA DE AVIAÇÃO (m3),m3,46.455,2023-03-26 10:47:25.923979


Enviando para o banco de dados schema = Final

In [19]:
engine = conn_bd()
df_final.to_sql(name='derivados_petroleo', con=engine, if_exists='append', index=False, schema='Final')

Conexao realizada!


286