### Importação das bibliotecas

In [1]:
import pandas as pd
import numpy as np

import sqlalchemy
from urllib.parse import quote_plus
from sqlalchemy import create_engine, text
from sqlalchemy.engine.base import Connection
from sqlalchemy.engine import URL
from datetime import date, datetime

### Conexão com o Banco

In [2]:
def conn_bd():

    # pyodbc stuff for MS SQL Server Express
    driver='{ODBC Driver 17 for SQL Server}'
    server='DESKTOP-LG9U8DH'
    database='Raizen'
    trusted_connection='yes'

    # pyodbc connection string
    connection_string = f'DRIVER={driver};SERVER={server};\
                          DATABASE={database};\
                          ;\ TRUSTED_CONNECTION={trusted_connection}'
    
    connection_string += f'DATABASE={database};'
    connection_string += f'TRUSTED_CONNECTION={trusted_connection}'



    try:
    # create sqlalchemy engine connection URL
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = sqlalchemy.create_engine(connection_url)
        print("Conexao realizada!")
    except Exception as error:
        error = str(error)
        print("Conexao não realizada! " + error)


    
    return engine

### Lendo os dados brutos do schema Stage

In [3]:
engine = conn_bd()

query = 'SELECT * FROM [Stage].[diesel]'
df_stage = pd.read_sql(text(query), engine.connect())

Conexao realizada!


### Exploração dos dados

In [4]:
df_stage.shape

(1350, 18)

In [5]:
df_stage.sample(10)

Unnamed: 0,COMBUSTÍVEL,ANO,REGIÃO,ESTADO,UNIDADE,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,TOTAL
944,ÓLEO DIESEL S-10 (m3),2021,REGIÃO SUDESTE,RIO DE JANEIRO,m3,114398.754,94653.023,117072.542,104697.068,109219.832,108975.347,118919.749,117876.718,115163.481,122028.632,110436.026,128197.306,
646,ÓLEO DIESEL MARÍTIMO (m3),2022,REGIÃO SUDESTE,MINAS GERAIS,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2062.0,
1063,ÓLEO DIESEL S-500 (m3),2015,REGIÃO NORTE,RONDÔNIA,m3,46509.4,39391.7,52239.8,50268.079,50489.75,52873.475,59821.1,56468.75,55285.5,57493.101,45549.6,53232.0,
267,ÓLEO DIESEL S-1800 (m3),2016,REGIÃO NORDESTE,CEARÁ,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
72,ÓLEO DIESEL S-1800 (m3),2017,REGIÃO NORDESTE,ALAGOAS,m3,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,0.0,
336,ÓLEO DIESEL MARÍTIMO (m3),2020,REGIÃO CENTRO-OESTE,DISTRITO FEDERAL,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
225,ÓLEO DIESEL (OUTROS ) (m3),2018,REGIÃO NORDESTE,BAHIA,m3,0.0,0.0,0.0,0.0,5.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,
781,ÓLEO DIESEL MARÍTIMO (m3),2019,REGIÃO NORTE,PARÁ,m3,10489.394,10660.433,10300.534,11391.497,11398.334,11181.393,12926.908,11841.779,9624.854,12891.884,13079.717,12098.207,
740,ÓLEO DIESEL (OUTROS ) (m3),2021,REGIÃO NORDESTE,PARAÍBA,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
813,ÓLEO DIESEL S-500 (m3),2015,REGIÃO NORDESTE,PERNAMBUCO,m3,92760.26,82973.7,93071.7,59013.68,58973.23,77006.65,62639.75,46375.7,47259.35,54051.1,61109.52,51132.95,


In [6]:
df_stage.dtypes

COMBUSTÍVEL     object
ANO              int64
REGIÃO          object
ESTADO          object
UNIDADE         object
Jan            float64
Fev            float64
Mar            float64
Abr            float64
Mai            float64
Jun            float64
Jul            float64
Ago            float64
Set            float64
Out            float64
Nov            float64
Dez            float64
TOTAL           object
dtype: object

### Criando a estrutura para o novo dataframe

vars auxiliares para a organização dos dados

In [9]:
df_stage.ANO.unique()

array([2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022],
      dtype=int64)

In [10]:
anos = [f'{ano}' for ano in range(2013, 2023)]

meses = ['Jan', 'Fev', 'Mar', 'Abr', 'Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez' ]


data_2013 = [f'2013-{mes}-01' for mes in range(1, 13)]
data_2014 = [f'2014-{mes}-01' for mes in range(1, 13)]
data_2015 = [f'2015-{mes}-01' for mes in range(1, 13)]
data_2016 = [f'2016-{mes}-01' for mes in range(1, 13)]
data_2017 = [f'2017-{mes}-01' for mes in range(1, 13)]
data_2018 = [f'2018-{mes}-01' for mes in range(1, 13)]
data_2019 = [f'2019-{mes}-01' for mes in range(1, 13)]
data_2020 = [f'2020-{mes}-01' for mes in range(1, 13)]
data_2021 = [f'2021-{mes}-01' for mes in range(1, 13)]
data_2022 = [f'2022-{mes}-01' for mes in range(1, 13)]


anos_dict = {
             2013: data_2013,
             2014: data_2014,
             2015: data_2015,
             2016: data_2016,
             2017: data_2017,
             2018: data_2018,
             2019: data_2019,
             2020: data_2020,
             2021: data_2021,
             2022: data_2022

             }

In [11]:
df_final =  pd.DataFrame({})

In [12]:
df_final.shape

(0, 0)

Função para criar novo dataframe

In [14]:
def criar_df_final(var_data, mes, df):
    var_data = var_data
    var_value = df.loc[index, mes]
    var_uf = df.loc[index, 'ESTADO']
    var_product = df.loc[index, 'COMBUSTÍVEL']
    d = {'year_month':var_data,'uf': var_uf,'product': var_product,'unit': 'm3','volume': var_value,'created_at': datetime.now()}
    linha = pd.DataFrame(data=d, index=[0])
    df = pd.DataFrame({})
    df = pd.concat([df, linha], ignore_index=True)
    return df

In [15]:
for index, row in df_stage.iterrows():
    
    for ano in anos:

        if df_stage.loc[index, 'ANO'] == int(ano):   
            for i, mes in enumerate(meses):
                df_final = pd.concat([df_final, criar_df_final(anos_dict[int(ano)][i], mes, df_stage)], ignore_index=True)
    

In [16]:
df_final.shape

(16200, 6)

In [17]:
df_final.dtypes

year_month            object
uf                    object
product               object
unit                  object
volume               float64
created_at    datetime64[ns]
dtype: object

In [18]:
df_final.sample(10)

Unnamed: 0,year_month,uf,product,unit,volume,created_at
13403,2016-12-01,RORAIMA,ÓLEO DIESEL MARÍTIMO (m3),m3,0.0,2023-03-28 14:27:53.611794
5059,2017-8-01,GOIÁS,ÓLEO DIESEL MARÍTIMO (m3),m3,0.0,2023-03-28 14:27:45.279982
13505,2018-6-01,RORAIMA,ÓLEO DIESEL (OUTROS ) (m3),m3,0.0,2023-03-28 14:27:53.728142
2252,2020-9-01,AMAZONAS,ÓLEO DIESEL S-1800 (m3),m3,0.0,2023-03-28 14:27:43.027614
2953,2022-2-01,BAHIA,ÓLEO DIESEL MARÍTIMO (m3),m3,755.0,2023-03-28 14:27:43.550070
16007,2019-12-01,TOCANTINS,ÓLEO DIESEL S-500 (m3),m3,35744.86,2023-03-28 14:27:56.494490
3377,2019-6-01,CEARÁ,ÓLEO DIESEL MARÍTIMO (m3),m3,1110.0,2023-03-28 14:27:43.872059
2418,2013-7-01,BAHIA,ÓLEO DIESEL S-500 (m3),m3,240402.282,2023-03-28 14:27:43.152471
1157,2022-6-01,ALAGOAS,ÓLEO DIESEL MARÍTIMO (m3),m3,427.707,2023-03-28 14:27:42.188338
1015,2019-8-01,ALAGOAS,ÓLEO DIESEL S-10 (m3),m3,15247.0,2023-03-28 14:27:42.081992


In [19]:
df_final = df_final.sort_values(by=['uf', 'year_month'], ascending=True)

In [20]:
df_final.head(10)

Unnamed: 0,year_month,uf,product,unit,volume,created_at
0,2013-1-01,ACRE,ÓLEO DIESEL S-10 (m3),m3,363.0,2023-03-28 14:27:41.328368
12,2013-1-01,ACRE,ÓLEO DIESEL S-500 (m3),m3,0.0,2023-03-28 14:27:41.347567
24,2013-1-01,ACRE,ÓLEO DIESEL S-1800 (m3),m3,10143.361,2023-03-28 14:27:41.359346
36,2013-1-01,ACRE,ÓLEO DIESEL MARÍTIMO (m3),m3,32.0,2023-03-28 14:27:41.368346
48,2013-1-01,ACRE,ÓLEO DIESEL (OUTROS ) (m3),m3,8.0,2023-03-28 14:27:41.378637
9,2013-10-01,ACRE,ÓLEO DIESEL S-10 (m3),m3,1439.0,2023-03-28 14:27:41.345577
21,2013-10-01,ACRE,ÓLEO DIESEL S-500 (m3),m3,0.0,2023-03-28 14:27:41.356363
33,2013-10-01,ACRE,ÓLEO DIESEL S-1800 (m3),m3,14139.166,2023-03-28 14:27:41.366347
45,2013-10-01,ACRE,ÓLEO DIESEL MARÍTIMO (m3),m3,109.0,2023-03-28 14:27:41.376346
57,2013-10-01,ACRE,ÓLEO DIESEL (OUTROS ) (m3),m3,0.0,2023-03-28 14:27:41.385635


Enviando para o banco de dados schema = Final

In [21]:
engine = conn_bd()
df_final.to_sql(name='diesel', con=engine, if_exists='append', index=False, schema='Final')

Conexao realizada!


146