### Importação das bibliotecas

In [1]:
import pandas as pd
import numpy as np

import sqlalchemy
from urllib.parse import quote_plus
from sqlalchemy import create_engine, text
from sqlalchemy.engine.base import Connection
from sqlalchemy.engine import URL

### Conexão com o Banco de Dados

In [2]:

def conn_bd():

    # pyodbc stuff for MS SQL Server Express
    driver='{ODBC Driver 17 for SQL Server}'
    server='DESKTOP-LG9U8DH'
    database='Raizen'
    trusted_connection='yes'

    # pyodbc connection string
    connection_string = f'DRIVER={driver};SERVER={server};\
                          DATABASE={database};\
                          ;\ TRUSTED_CONNECTION={trusted_connection}'
    
    connection_string += f'DATABASE={database};'
    connection_string += f'TRUSTED_CONNECTION={trusted_connection}'



    try:
    # create sqlalchemy engine connection URL
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = sqlalchemy.create_engine(connection_url)
        print("Conexao realizada!")
    except Exception as error:
        error = str(error)
        print("Conexao não realizada! " + error)


    
    return engine

### Leitura dos dados (diesel)

In [3]:
caminho = r'C:\Users\BlueShift\Desktop\Teste Raizen\dados\vendas-combustiveis-m3__oleo_diesel_2023.xlsx'

In [4]:
planilha = pd.read_excel(caminho, skiprows=2, sheet_name=None)

In [5]:
planilha

{'2013':                     COMBUSTÍVEL   ANO               REGIÃO  \
 0         ÓLEO DIESEL S-10 (m3)  2013         REGIÃO NORTE   
 1         ÓLEO DIESEL S-10 (m3)  2013         REGIÃO NORTE   
 2         ÓLEO DIESEL S-10 (m3)  2013         REGIÃO NORTE   
 3         ÓLEO DIESEL S-10 (m3)  2013         REGIÃO NORTE   
 4         ÓLEO DIESEL S-10 (m3)  2013         REGIÃO NORTE   
 ..                          ...   ...                  ...   
 130  ÓLEO DIESEL (OUTROS ) (m3)  2013           REGIÃO SUL   
 131  ÓLEO DIESEL (OUTROS ) (m3)  2013  REGIÃO CENTRO-OESTE   
 132  ÓLEO DIESEL (OUTROS ) (m3)  2013  REGIÃO CENTRO-OESTE   
 133  ÓLEO DIESEL (OUTROS ) (m3)  2013  REGIÃO CENTRO-OESTE   
 134  ÓLEO DIESEL (OUTROS ) (m3)  2013  REGIÃO CENTRO-OESTE   
 
                  ESTADO UNIDADE        Jan        Fev       Mar        Abr  \
 0              RONDÔNIA      m3   3517.600   3681.700   4700.67   5339.200   
 1                  ACRE      m3    363.000    410.000    536.00    607.000 

In [6]:
planilha.keys()

dict_keys(['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', 'Plan1'])

In [8]:
df_diesel = pd.DataFrame()

In [9]:
for i in planilha:
    if i != 'Plan1':
        df_diesel = pd.concat([df_diesel, planilha[i]])

In [10]:
df_diesel.shape

(1350, 17)

In [11]:
df_diesel.sample(10)

Unnamed: 0,COMBUSTÍVEL,ANO,REGIÃO,ESTADO,UNIDADE,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez
10,ÓLEO DIESEL (OUTROS ) (m3),2016,REGIÃO SUDESTE,MINAS GERAIS,m3,6.789,8.447,4.254,14.159,3.736,1.628,2.8,0.0,10.0,0.0,0.0,0.0
68,ÓLEO DIESEL S-1800 (m3),2022,REGIÃO NORDESTE,PERNAMBUCO,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
38,ÓLEO DIESEL MARÍTIMO (m3),2020,REGIÃO NORDESTE,BAHIA,m3,1275.0,1009.0,767.0,598.0,530.0,264.0,341.0,656.0,761.0,776.0,1009.0,1327.0
74,ÓLEO DIESEL S-1800 (m3),2019,REGIÃO NORTE,TOCANTINS,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25,ÓLEO DIESEL (OUTROS ) (m3),2019,REGIÃO NORTE,ACRE,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59,ÓLEO DIESEL S-1800 (m3),2021,REGIÃO SUL,SANTA CATARINA,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39,ÓLEO DIESEL MARÍTIMO (m3),2021,REGIÃO NORDESTE,SERGIPE,m3,55.0,80.0,70.0,55.0,115.0,283.0,145.0,120.0,130.0,133.0,175.0,75.0
5,ÓLEO DIESEL (OUTROS ) (m3),2021,REGIÃO SUL,SANTA CATARINA,m3,25.0,45.0,15.0,30.0,25.0,40.0,60.0,65.0,30.0,70.0,50.0,95.0
75,ÓLEO DIESEL S-1800 (m3),2020,REGIÃO NORTE,AMAPÁ,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55,ÓLEO DIESEL S-1800 (m3),2020,REGIÃO CENTRO-OESTE,GOIÁS,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
df_diesel = df_diesel.sort_values(by=['ESTADO', 'ANO'], ascending=True)

In [13]:
df_diesel.head(4)

Unnamed: 0,COMBUSTÍVEL,ANO,REGIÃO,ESTADO,UNIDADE,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez
1,ÓLEO DIESEL S-10 (m3),2013,REGIÃO NORTE,ACRE,m3,363.0,410.0,536.0,607.0,740.0,756.0,971.0,1174.0,1240.0,1439.0,1483.0,1483.0
28,ÓLEO DIESEL S-500 (m3),2013,REGIÃO NORTE,ACRE,m3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55,ÓLEO DIESEL S-1800 (m3),2013,REGIÃO NORTE,ACRE,m3,10143.361,11170.935,9642.0,11302.5,12034.0,12136.0,13515.5,14032.0,13589.0,14139.166,12430.431,11134.0
82,ÓLEO DIESEL MARÍTIMO (m3),2013,REGIÃO NORTE,ACRE,m3,32.0,75.0,120.0,155.0,60.0,30.0,25.0,11.0,25.0,109.0,40.0,119.73


### Exploração dos dados

In [14]:
df_diesel.columns

Index(['COMBUSTÍVEL', 'ANO', 'REGIÃO', 'ESTADO', 'UNIDADE', 'Jan', 'Fev',
       'Mar', 'Abr', 'Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez'],
      dtype='object')

In [16]:
df_diesel[df_diesel['ESTADO'] == 'ACRE'].sum(axis=0, numeric_only=True)

ANO    100875.000000
Jan    115579.426114
Fev    109409.568491
Mar    118185.335032
Abr    119847.070359
Mai    128790.251032
Jun    131544.849950
Jul    145298.103573
Ago    148659.915573
Set    149444.701114
Out    153517.376032
Nov    136588.488216
Dez    131606.872216
dtype: float64

In [17]:
estado_lista = []
estado_lista = df_diesel.ESTADO.unique()
estado_lista

array(['ACRE', 'ALAGOAS', 'AMAPÁ', 'AMAZONAS', 'BAHIA', 'CEARÁ',
       'DISTRITO FEDERAL', 'ESPÍRITO SANTO', 'GOIÁS', 'MARANHÃO',
       'MATO GROSSO', 'MATO GROSSO DO SUL', 'MINAS GERAIS', 'PARANÁ',
       'PARAÍBA', 'PARÁ', 'PERNAMBUCO', 'PIAUÍ', 'RIO DE JANEIRO',
       'RIO GRANDE DO NORTE', 'RIO GRANDE DO SUL', 'RONDÔNIA', 'RORAIMA',
       'SANTA CATARINA', 'SERGIPE', 'SÃO PAULO', 'TOCANTINS'],
      dtype=object)

In [18]:
for i in estado_lista:
    print(i)
    print(f"{df_diesel[df_diesel['ESTADO'] == i].sum(axis=0, numeric_only=True)}")
    print('='*25)

ACRE
ANO    100875.000000
Jan    115579.426114
Fev    109409.568491
Mar    118185.335032
Abr    119847.070359
Mai    128790.251032
Jun    131544.849950
Jul    145298.103573
Ago    148659.915573
Set    149444.701114
Out    153517.376032
Nov    136588.488216
Dez    131606.872216
dtype: float64
ALAGOAS
ANO    100875.000000
Jan    358891.785322
Fev    317490.871058
Mar    324045.633146
Abr    279876.342851
Mai    267747.204146
Jun    251007.195970
Jul    266068.027234
Ago    286608.024234
Set    310703.989322
Out    366578.786146
Nov    366834.472111
Dez    376134.478111
dtype: float64
AMAPÁ
ANO    100875.000000
Jan    183842.916729
Fev    150118.586583
Mar    153069.151632
Abr    147469.745020
Mai    149312.597632
Jun    152374.131534
Jul    164010.619680
Ago    178376.971680
Set    169951.155729
Out    181781.575632
Nov    179460.123212
Dez    173089.029212
dtype: float64
AMAZONAS
ANO    100875.000000
Jan    837841.262118
Fev    765544.113494
Mar    825870.095036
Abr    878948.545365
Mai

### Envio dos dados para o Banco de dados no schema Stage

In [20]:
engine = conn_bd()

df_diesel.to_sql(name = 'diesel',con=engine, if_exists='append', index=False, schema='Stage')

Conexao realizada!


120