In [None]:
#pip install folium



In [1]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import scipy.stats as stats
import folium
import requests

pd.options.display.max_columns=50

In [2]:
from google.colab import drive

# Monta o Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Define o diretório onde estão os arquivos (Google Drive)
base_rotas = '/content/drive/My Drive/Colab Notebooks/bases/Rotas.csv'
base_clientes = '/content/drive/My Drive/Colab Notebooks/bases/Clientes.csv'
base_fabricas = '/content/drive/My Drive/Colab Notebooks/bases/Fabricas.csv'

In [4]:
# dicionário de tipos
dict_type_rotas={'Dt.Emissao':str,
                 'Dt.Entrega':str,
                 'Mes.Base':str,
                 'Ano.Exec':str,
                 'CO.Fabrica':str,
                 'CO.Cliente':str,
                 'Incoterm':str,
                 'Veiculo':str,
                 'Qtd/pallets':int,
                 'Qtd.Transp':int,
                 'Moeda':str,
                 'Vlr.Frete':float,
                 'Dist':float  }

dict_type_fabricas={'CO.Fabrica':str,
                    'NO_MUN':str,
                    'NO_MUN_MIN':str,
                    'SG_UF':str,
                    'LAT':float,
                    'LONG':float }

dict_type_clientes={'CO.Cliente':str,
                    'MUN':str,
                    'LAT':float,
                    'LONG':float }

In [5]:
# lendo as bases csv
df_rotas=pd.read_csv(base_rotas, sep=",", dtype=dict_type_rotas)
df_fabricas=pd.read_csv(base_fabricas, sep=",", dtype=dict_type_fabricas, encoding = 'latin1')
df_clientes=pd.read_csv(base_clientes, sep=",", dtype=dict_type_clientes)

### Validação das Bases

#### Base Clientes e Fábricas

In [11]:
df_clientes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   CO.Cliente  51 non-null     object 
 1   MUN         51 non-null     object 
 2   LAT         51 non-null     float64
 3   LONG        51 non-null     float64
dtypes: float64(2), object(2)
memory usage: 1.7+ KB


In [12]:
df_clientes.head()

Unnamed: 0,CO.Cliente,MUN,LAT,LONG
0,2301,VALINHOS,-22.95188,-47.02779
1,2302,CAMPINAS,-22.89429,-47.05822
2,2303,CAMPINAS,-22.92634,-47.03974
3,2304,CAMPINAS,-22.83309,-47.07943
4,2305,CAMPINAS,-22.82307,-47.07818


In [13]:
df_fabricas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   CO.Fabrica  3 non-null      object 
 1   NO_MUN      3 non-null      object 
 2   NO_MUN_MIN  3 non-null      object 
 3   SG_UF       3 non-null      object 
 4   LAT         3 non-null      float64
 5   LONG        3 non-null      float64
dtypes: float64(2), object(4)
memory usage: 272.0+ bytes


In [14]:
df_fabricas.head()

Unnamed: 0,CO.Fabrica,NO_MUN,NO_MUN_MIN,SG_UF,LAT,LONG
0,3423909,ITU,Itu,SP,-23.251784,-47.343908
1,3403208,ARARAQUARA,Araraquara,SP,-21.820935,-48.172327
2,3424402,JACAREI,Jacareí,SP,-23.287136,-45.984976


In [15]:
# Carregar o arquivo GeoJSON
geojson_file = '/content/drive/MyDrive/Colab Notebooks/bases//geojs-35-mun.json'
mapa_coord = pd.read_json(geojson_file)

In [16]:
# Criar o mapa
mapa = folium.Map(location=[df_clientes['LAT'].mean(), df_clientes['LONG'].mean()], zoom_start=4)  # Coordenadas e zoom inicial

In [17]:
# Função para adicionar marcadores ao mapa
def add_markers(df, map_obj, color):
    for index, row in df.iterrows():
        folium.Marker([row['LAT'], row['LONG']], popup=row[df.columns[0]], icon=folium.Icon(color=color)).add_to(map_obj)

In [18]:
# Adicionar marcadores para clientes (azul)
add_markers(df_clientes, mapa, 'green')

In [19]:
# Adicionar marcadores para fábricas (vermelho)
add_markers(df_fabricas, mapa, 'red')

In [20]:
# Adicionar as geometrias do GeoJSON ao mapa
for feature in mapa_coord['features']:
    folium.GeoJson(feature,
                   style_function=lambda feature: {
                       'fillOpacity': '0.1',      # Opacidade do preenchimento
                       'color': 'blue',           # Altera a cor das linhas
                       'weight': 0.7,             # Altera a espessura das linhas
                       'opacity': 0.7,            # Altera a opacidade das linhas
                       'dashArray': '5, 5'        # Padrão de linhas pontilhadas
                   }).add_to(mapa)

In [21]:
# Exibir o mapa no notebook
mapa

Output hidden; open in https://colab.research.google.com to view.

#### Base Rotas

In [22]:
df_rotas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106262 entries, 0 to 106261
Data columns (total 13 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Dt.Emissao   106262 non-null  object 
 1   Dt.Entrega   106262 non-null  object 
 2   Mes.Base     106262 non-null  object 
 3   Ano.Exec     106262 non-null  object 
 4   CO.Fabrica   106262 non-null  object 
 5   CO.Cliente   106262 non-null  object 
 6   Incoterm     106262 non-null  object 
 7   Veiculo      106262 non-null  object 
 8   Qtd/pallets  106262 non-null  int64  
 9   Qtd.Transp   106262 non-null  int64  
 10  Moeda        106262 non-null  object 
 11  Vlr.Frete    106262 non-null  float64
 12  Dist         106262 non-null  float64
dtypes: float64(2), int64(2), object(9)
memory usage: 10.5+ MB


In [23]:
df_rotas.head()

Unnamed: 0,Dt.Emissao,Dt.Entrega,Mes.Base,Ano.Exec,CO.Fabrica,CO.Cliente,Incoterm,Veiculo,Qtd/pallets,Qtd.Transp,Moeda,Vlr.Frete,Dist
0,01/01/23,05/01/23,1,2023,3423909,2311,FOB,P24,24,3600,BRL,0.0,71.41
1,01/01/23,03/01/23,1,2023,3424402,2333,FOB,P12,12,1500,BRL,0.0,74.02
2,01/01/23,04/01/23,1,2023,3403208,2347,FOB,P12,12,1800,BRL,0.0,85.92
3,01/01/23,06/01/23,1,2023,3424402,2332,FOB,P12,12,1800,BRL,0.0,90.01
4,01/01/23,04/01/23,1,2023,3424402,2332,FOB,P12,12,1500,BRL,0.0,90.01


In [24]:
# mudando tipo data
df_rotas['Dt.Emissao']= pd.to_datetime(df_rotas['Dt.Emissao'], format='%d/%m/%y')
df_rotas['Dt.Entrega']= pd.to_datetime(df_rotas['Dt.Entrega'], format='%d/%m/%y')

In [25]:
# regra de data de faturamento < data de entrega (se false, data inconsistente)
df_rotas['flag_dt_consist']=df_rotas['Dt.Emissao'] <= df_rotas['Dt.Entrega']

In [26]:
# contagem da flag de inconsistência de data
df_rotas['flag_dt_consist'].value_counts()

flag_dt_consist
True     106257
False         5
Name: count, dtype: int64

In [27]:
# gravando em um dataframe as inconsistências para análise
dt_inconsit = df_rotas.loc[~df_rotas['flag_dt_consist']]

In [28]:
# separação das colunas utilizadas para análise
dt_inconsit = dt_inconsit[['Dt.Emissao', 'Dt.Entrega', 'Mes.Base', 'Ano.Exec' ]]

In [29]:
dt_inconsit

Unnamed: 0,Dt.Emissao,Dt.Entrega,Mes.Base,Ano.Exec
59335,2023-12-17,2023-12-15,12,2023
59370,2023-12-18,2023-12-16,12,2023
59371,2023-12-18,2023-12-17,12,2023
59458,2023-12-21,2023-12-20,12,2023
59480,2023-12-24,2023-12-23,12,2023


In [30]:
# gravando .csv com inconsistencias de data
# dt_inconsit.to_csv('dt_inconsit.csv', index=True)

In [31]:
# regra de FOB != 0 (se false, FOB com valor > 0)
df_rotas.loc[(df_rotas['Incoterm']=='FOB') & (df_rotas['Vlr.Frete'] > 0), 'flag_incoterm_valor_consist'] = False
df_rotas['flag_incoterm_valor_consist'].fillna(True, inplace=True)

In [32]:
# contagem da flag de inconsistência FOB
df_rotas['flag_incoterm_valor_consist'].value_counts()

flag_incoterm_valor_consist
True     97495
False     8767
Name: count, dtype: int64

In [33]:
# gravando em um dataframe as inconsistências para análise
incoterm_inconsit = df_rotas.loc[~df_rotas['flag_incoterm_valor_consist']]

In [34]:
# separação das colunas utilizadas para análise
incoterm_inconsit = incoterm_inconsit[['Mes.Base', 'Ano.Exec','Incoterm', 'Vlr.Frete', 'flag_incoterm_valor_consist' ]]

In [35]:
incoterm_inconsit.head()

Unnamed: 0,Mes.Base,Ano.Exec,Incoterm,Vlr.Frete,flag_incoterm_valor_consist
74,1,2023,FOB,595.38,False
75,1,2023,FOB,705.24,False
76,1,2023,FOB,839.57,False
230,1,2023,FOB,595.38,False
231,1,2023,FOB,595.38,False


In [36]:
# gravando .csv com inconsistencias de FOB
# incoterm_inconsit.to_csv('FOB_inconsit.csv', index=True)

In [37]:
# valores encontrados dentro das inconsistências
incoterm_inconsit['Vlr.Frete'].unique()

array([595.38, 705.24, 839.57, 650.59, 359.13, 399.03, 781.4 , 585.4 ,
       573.82, 625.12, 798.6 , 516.44, 355.54, 427.65, 459.99, 441.58,
       657.18, 511.1 , 720.75, 741.35, 547.75, 464.8 , 797.6 , 671.67,
       413.99, 576.57, 606.93, 704.29, 744.22, 707.02, 758.67, 492.97,
       638.08, 780.37, 633.87])

### Enriquecendo a Base Rotas com Clientes e Fábricas

In [38]:
# enriquecimento da base rotas com a clientes e fabricas
df_merge=pd.merge(pd.merge(df_rotas, df_clientes, how="outer", on="CO.Cliente"), df_fabricas, how="outer", on="CO.Fabrica")

In [39]:
# realocação das colunas
df_merge.insert(14,'CO.Cliente', df_merge.pop('CO.Cliente'))
df_merge.insert(17,'CO.Fabrica', df_merge.pop('CO.Fabrica'))

In [40]:
df_merge = df_merge.rename(columns={'MUN': 'MUN.Cliente',
                                    'LAT_x': 'LAT.Cliente',
                                    'LONG_x': 'LONG.Cliente',
                                    'NO_MUN': 'MUN.Fabrica',
                                    'LAT_y': 'LAT.Fabrica',
                                    'LONG_y': 'LONG.Fabrica' })

In [41]:
df_merge = df_merge.drop(columns=['flag_dt_consist', 'flag_incoterm_valor_consist', 'NO_MUN_MIN', 'SG_UF'])

In [42]:
df_merge.head(3)

Unnamed: 0,Dt.Emissao,Dt.Entrega,Mes.Base,Ano.Exec,Incoterm,Veiculo,Qtd/pallets,Qtd.Transp,Moeda,Vlr.Frete,Dist,CO.Cliente,MUN.Cliente,LAT.Cliente,LONG.Cliente,CO.Fabrica,MUN.Fabrica,LAT.Fabrica,LONG.Fabrica
0,2023-01-01,2023-01-05,1,2023,FOB,P24,24,3600,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.251784,-47.343908
1,2023-01-02,2023-01-05,1,2023,FOB,P12,12,1500,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.251784,-47.343908
2,2023-01-02,2023-01-05,1,2023,FOB,P24,24,3600,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.251784,-47.343908


In [43]:
# concatenando strings e criando cod-nome e rota (fabrica-cliente)
df_merge['cod_mun_cliente']=df_merge['CO.Cliente']+' - '+df_merge['MUN.Cliente']
df_merge['cod_mun_fabrica']=df_merge['CO.Fabrica']+' - '+df_merge['MUN.Fabrica']
df_merge['rota']=df_merge['MUN.Fabrica']+' - '+df_merge['MUN.Cliente']

In [44]:
df_merge['LAT.Cliente'] = df_merge['LAT.Cliente'].astype(str)
df_merge['LONG.Cliente'] = df_merge['LONG.Cliente'].astype(str)
df_merge['LAT.Fabrica'] = df_merge['LAT.Fabrica'].astype(str)
df_merge['LONG.Fabrica'] = df_merge['LONG.Fabrica'].astype(str)

In [45]:
# concatenando strings e criando lat-long das fabricas e clientes
df_merge['loc_cliente']=df_merge['LAT.Cliente']+','+df_merge['LONG.Cliente']
df_merge['loc_fabrica']=df_merge['LAT.Fabrica']+','+df_merge['LONG.Fabrica']

In [46]:
# criação de um dataframe de rotas com lat-long da origem-destino
df_rota_lat_long=df_merge[['rota', 'loc_cliente', 'loc_fabrica']].drop_duplicates().reset_index(drop=True)

In [47]:
df_rota_lat_long

Unnamed: 0,rota,loc_cliente,loc_fabrica
0,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079"
1,ITU - SAO PAULO,"-23.46722,-46.58475","-23.25178418,-47.3439079"
2,ITU - RIBEIRAO PRETO,"-21.19498,-47.75589","-23.25178418,-47.3439079"
3,ITU - SAO PAULO,"-23.48552,-46.73883","-23.25178418,-47.3439079"
4,ITU - OSASCO,"-23.55164,-46.79466","-23.25178418,-47.3439079"
...,...,...,...
112,JACAREI - SOROCABA,"-23.49563,-47.51121","-23.28713588,-45.9849763"
113,JACAREI - CAMPINAS,"-22.83309,-47.07943","-23.28713588,-45.9849763"
114,JACAREI - SAO CARLOS,"-22.03848,-47.86018","-23.28713588,-45.9849763"
115,JACAREI - AMERICANA,"-22.72237,-47.3074","-23.28713588,-45.9849763"


In [48]:
#chave API maps
google_maps_key = "AIzaSyCSfp-lJf1SC2X0X-L8AfjNAi4NLMoBXsg"

In [49]:
# consulta na API Google para calcular a distância e tempo das rotas

def get_route_info(origin, destination, google_maps_key):
    url = 'https://maps.googleapis.com/maps/api/directions/json'
    params = {
        'origin': origin,
        'destination': destination,
        'key': google_maps_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        route = response.json()['routes'][0]['legs'][0]
        distance = route['distance']['value'] / 1000.0
        duration = route['duration']['value'] / 60.0
        return distance, duration
    else:
        return None, None

In [50]:
# criação de listas vazias de distancia e tempo para API preencher
distances = []
durations = []

In [51]:
%%time

# consulta linha a linha do dataframe de rotas
for index, row in df_rota_lat_long.iterrows():
    origin = row['loc_fabrica']
    destination = row['loc_cliente']
    distance, duration = get_route_info(origin, destination, google_maps_key)
    distances.append(distance)
    durations.append(duration)

CPU times: user 10.2 s, sys: 198 ms, total: 10.4 s
Wall time: 41.7 s


In [52]:
# criação das colunas quilometragem e tempo de viagem
df_rota_lat_long['km_api'] = distances
df_rota_lat_long['tempo_min_api'] = durations

In [53]:
df_rota_lat_long.head()

Unnamed: 0,rota,loc_cliente,loc_fabrica,km_api,tempo_min_api
0,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079",71.15,55.716667
1,ITU - SAO PAULO,"-23.46722,-46.58475","-23.25178418,-47.3439079",117.495,98.516667
2,ITU - RIBEIRAO PRETO,"-21.19498,-47.75589","-23.25178418,-47.3439079",263.18,185.15
3,ITU - SAO PAULO,"-23.48552,-46.73883","-23.25178418,-47.3439079",94.75,76.5
4,ITU - OSASCO,"-23.55164,-46.79466","-23.25178418,-47.3439079",88.047,72.316667


In [54]:
# merge entre o dataframe com quilometragem e tempo de viagem com a base rotas enriquecida
df_rotas_enriq=pd.merge(df_merge, df_rota_lat_long, how="left", on=['rota','loc_cliente','loc_fabrica'])

In [55]:
df_rotas_enriq.head(3)

Unnamed: 0,Dt.Emissao,Dt.Entrega,Mes.Base,Ano.Exec,Incoterm,Veiculo,Qtd/pallets,Qtd.Transp,Moeda,Vlr.Frete,Dist,CO.Cliente,MUN.Cliente,LAT.Cliente,LONG.Cliente,CO.Fabrica,MUN.Fabrica,LAT.Fabrica,LONG.Fabrica,cod_mun_cliente,cod_mun_fabrica,rota,loc_cliente,loc_fabrica,km_api,tempo_min_api
0,2023-01-01,2023-01-05,1,2023,FOB,P24,24,3600,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.25178418,-47.3439079,2311 - PIRACICABA,3423909 - ITU,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079",71.15,55.716667
1,2023-01-02,2023-01-05,1,2023,FOB,P12,12,1500,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.25178418,-47.3439079,2311 - PIRACICABA,3423909 - ITU,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079",71.15,55.716667
2,2023-01-02,2023-01-05,1,2023,FOB,P24,24,3600,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.25178418,-47.3439079,2311 - PIRACICABA,3423909 - ITU,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079",71.15,55.716667


In [56]:
# separação das colunas utilizadas para análise de distância
rotas_dist = df_rotas_enriq[['rota', 'Dist', 'km_api']].drop_duplicates()

In [57]:
# Calcular as estatísticas
rotas_dist['MEAN'] = rotas_dist[['Dist', 'km_api']].mean(axis=1)
rotas_dist['MIN'] = rotas_dist[['Dist', 'km_api']].min(axis=1)
rotas_dist['MAX'] = rotas_dist[['Dist', 'km_api']].max(axis=1)
rotas_dist['ERRO'] = (rotas_dist['Dist'] - rotas_dist['km_api']).abs()

In [58]:
rotas_dist.head(2)

Unnamed: 0,rota,Dist,km_api,MEAN,MIN,MAX,ERRO
0,ITU - PIRACICABA,71.41,71.15,71.28,71.15,71.41,0.26
734,ITU - SAO PAULO,117.6,117.495,117.5475,117.495,117.6,0.105


In [59]:
inconsit_dist_maior_5km = rotas_dist[rotas_dist['ERRO'] > 5]

In [60]:
inconsit_dist_maior_5km

Unnamed: 0,rota,Dist,km_api,MEAN,MIN,MAX,ERRO
16620,ITU - BAURU,260.8,250.234,255.517,250.234,260.8,10.566
20224,ITU - BAURU,261.83,251.818,256.824,251.818,261.83,10.012
23970,ITU - RIBEIRAO PRETO,282.13,273.519,277.8245,273.519,282.13,8.611
36457,ARARAQUARA - SAO PAULO,287.46,277.367,282.4135,277.367,287.46,10.093
39271,ARARAQUARA - SANTANA DE PARNAIBA,270.5,258.342,264.421,258.342,270.5,12.158
57223,ARARAQUARA - BAURU,130.85,124.481,127.6655,124.481,130.85,6.369
62233,ARARAQUARA - SANTANA DE PARNAIBA,269.85,257.466,263.658,257.466,269.85,12.384
79728,JACAREI - SAO PAULO,95.61,120.639,108.1245,95.61,120.639,25.029
86418,JACAREI - CAMPINAS,155.67,146.186,150.928,146.186,155.67,9.484


In [61]:
df_rotas_enriq.iloc[62233]

Dt.Emissao                      2023-04-04 00:00:00
Dt.Entrega                      2023-04-07 00:00:00
Mes.Base                                          4
Ano.Exec                                       2023
Incoterm                                        FOB
Veiculo                                         P12
Qtd/pallets                                      12
Qtd.Transp                                     1500
Moeda                                           BRL
Vlr.Frete                                       0.0
Dist                                         269.85
CO.Cliente                                     2334
MUN.Cliente                     SANTANA DE PARNAIBA
LAT.Cliente                               -23.46668
LONG.Cliente                              -46.84622
CO.Fabrica                                  3403208
MUN.Fabrica                              ARARAQUARA
LAT.Fabrica                            -21.82093539
LONG.Fabrica                           -48.17232722
cod_mun_clie

In [62]:
# gravando .csv com inconsistencias de distância
# inconsit_dist_maior_5km.to_csv('inconsit_dist_maior_5km.csv', index=True)

### Separando mês da base rotas_enriq para modelagem

In [66]:
df_rotas_enriq.head(2)

Unnamed: 0,Dt.Emissao,Dt.Entrega,Mes.Base,Ano.Exec,Incoterm,Veiculo,Qtd/pallets,Qtd.Transp,Moeda,Vlr.Frete,Dist,CO.Cliente,MUN.Cliente,LAT.Cliente,LONG.Cliente,CO.Fabrica,MUN.Fabrica,LAT.Fabrica,LONG.Fabrica,cod_mun_cliente,cod_mun_fabrica,rota,loc_cliente,loc_fabrica,km_api,tempo_min_api
0,2023-01-01,2023-01-05,1,2023,FOB,P24,24,3600,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.25178418,-47.3439079,2311 - PIRACICABA,3423909 - ITU,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079",71.15,55.716667
1,2023-01-02,2023-01-05,1,2023,FOB,P12,12,1500,BRL,0.0,71.41,2311,PIRACICABA,-22.74145,-47.60181,3423909,ITU,-23.25178418,-47.3439079,2311 - PIRACICABA,3423909 - ITU,ITU - PIRACICABA,"-22.74145,-47.60181","-23.25178418,-47.3439079",71.15,55.716667


In [65]:
df_rotas_fev = df_rotas_enriq.loc[(df_rotas_enriq['Dt.Emissao'] >= '2023-02-01') & (df_rotas_enriq['Dt.Emissao'] <= '2023-02-28')]

In [67]:
# gravando .csv com rotas de fevereiro
df_rotas_fev.to_csv('rotas_fev.csv', index=False)