### Notebook criado para obter o distrito de cada ocorrência

In [1]:
import geopandas as gpd
import re
from pyproj import Proj, Transformer
import pandas as pd

In [2]:
# Lista com os anos dos arquivos
lista_anos = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
dataframes = {}

# Percorrendo a lista e carregando os DataFrames
for ano in lista_anos:
    arquivo = f'../dados/ocorrencias/SIRGAS_SHP_riscoocorrencia_{ano}.shp'
    dataframes[f'df_{ano}'] = gpd.read_file(arquivo)

df_2013 = dataframes['df_2013']
df_2014 = dataframes['df_2014']
df_2015 = dataframes['df_2015']
df_2016 = dataframes['df_2016']
df_2017 = dataframes['df_2017']
df_2018 = dataframes['df_2018']
df_2019 = dataframes['df_2019']
df_2020 = dataframes['df_2020']
df_2021 = dataframes['df_2021']
df_2022 = dataframes['df_2022']
df_2023 = dataframes['df_2023']
df_2024 = dataframes['df_2024']

In [3]:
df_total1 = pd.concat([df_2021, df_2022, df_2023, df_2024], ignore_index=True)
df_total2 = pd.concat([df_2013, df_2014, df_2015, df_2016, df_2017, df_2018, df_2019, df_2020], ignore_index=True)

df_total1 = df_total1.rename(columns={'subpreit':'subprefeit'})

df_total = pd.concat([df_total2, df_total1], ignore_index=True)

In [4]:
df_total

Unnamed: 0,data,ocorrencia,subprefeit,name,geometry
0,2013-12-29 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,47368,POINT Z (318923.714 7389107.424 0)
1,2013-12-03 00:00:00,QUEDA DE ARVORE,CS - CAPELA DO SOCORRO,45006,POINT Z (326291.01 7378438.554 0)
2,2013-12-10 00:00:00,ALAGAMENTO,SA - SANTO AMARO,45361,POINT Z (325784.879 7383070.03 0)
3,2013-12-30 00:00:00,QUEDA DE ARVORE,SE - SE,47427,POINT Z (332913.137 7396238.346 0)
4,2013-12-29 00:00:00,QUEDA DE ARVORE,SA - SANTO AMARO,47337,POINT Z (325940.608 7385402.683 0)
...,...,...,...,...,...
58267,2024-01-31 00:00:00,ALAGAMENTO,SB - SAPOPEMBA,32057914,POINT (347483.632 7386553.452)
58268,2024-01-31 00:00:00,QUEDA DE ARVORE,IQ - ITAQUERA,32058056,POINT (354127.126 7394828.661)
58269,2024-01-31 00:00:00,INUNDACAO,CS - CAPELA DO SOCORRO,32058336,POINT (328375.103 7375680.959)
58270,2024-01-31 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,32058368,POINT (324562.933 7390714.965)


In [5]:
gdf = gpd.read_file('../dados/distritos_shapefile/SIRGAS_SHP_distrito.shp')

In [6]:
gdf

Unnamed: 0,ds_nome,ds_codigo,ds_cd_sub,ds_subpref,ds_sigla,ds_areamt,ds_areakm,geometry
0,MANDAQUI,51,05,SANTANA-TUCURUVI,MAN,1.324786e+07,13.248,"POLYGON ((330950.373 7407837.176, 330952.177 7..."
1,MARSILAC,52,20,PARELHEIROS,MAR,2.081957e+08,208.196,"POLYGON ((336124.09 7355302.282, 336121.755 73..."
2,MOEMA,32,12,VILA MARIANA,MOE,9.079516e+06,9.080,"POLYGON ((331242.174 7392162.324, 331244.581 7..."
3,ARTUR ALVIM,5,21,PENHA,AAL,6.508361e+06,6.508,"POLYGON ((349416.226 7397718.808, 349416.884 7..."
4,IGUATEMI,33,30,SAO MATEUS,IGU,1.958356e+07,19.584,"POLYGON ((350859.949 7389599.855, 350880.182 7..."
...,...,...,...,...,...,...,...,...
91,CAMPO LIMPO,17,17,CAMPO LIMPO,CLM,1.259634e+07,12.596,"POLYGON ((321265.375 7388006.57, 321292.309 73..."
92,JAGUARE,41,08,LAPA,JRE,6.579876e+06,6.580,"POLYGON ((322952.516 7394978.618, 323250.62 73..."
93,JARAGUA,42,02,PIRITUBA-JARAGUA,JAR,2.833031e+07,28.330,"POLYGON ((319172.403 7405179.204, 319174.204 7..."
94,JARDIM HELENA,44,23,SAO MIGUEL,JDH,9.148559e+06,9.149,"POLYGON ((352915.621 7402621.049, 352944.248 7..."


In [7]:
# Realizando a junção espacial entre os DataFrames de acordo com a localização
gdf_ocorrencias_com_distritos = gpd.sjoin(df_total, gdf, how="left", predicate="within")

In [8]:
gdf_ocorrencias_com_distritos

Unnamed: 0,data,ocorrencia,subprefeit,name,geometry,index_right,ds_nome,ds_codigo,ds_cd_sub,ds_subpref,ds_sigla,ds_areamt,ds_areakm
0,2013-12-29 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,47368,POINT Z (318923.714 7389107.424 0),19.0,RAPOSO TAVARES,65,10,BUTANTA,RTA,12433885.23,12.434
1,2013-12-03 00:00:00,QUEDA DE ARVORE,CS - CAPELA DO SOCORRO,45006,POINT Z (326291.01 7378438.554 0),63.0,SOCORRO,79,19,CAPELA DO SOCORRO,SOC,11965477.82,11.965
2,2013-12-10 00:00:00,ALAGAMENTO,SA - SANTO AMARO,45361,POINT Z (325784.879 7383070.03 0),60.0,SANTO AMARO,71,14,SANTO AMARO,SAM,16036225.98,16.036
3,2013-12-30 00:00:00,QUEDA DE ARVORE,SE - SE,47427,POINT Z (332913.137 7396238.346 0),55.0,REPUBLICA,66,09,SE,REP,2396993.44,2.397
4,2013-12-29 00:00:00,QUEDA DE ARVORE,SA - SANTO AMARO,47337,POINT Z (325940.608 7385402.683 0),60.0,SANTO AMARO,71,14,SANTO AMARO,SAM,16036225.98,16.036
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58267,2024-01-31 00:00:00,ALAGAMENTO,SB - SAPOPEMBA,32057914,POINT (347483.632 7386553.452),22.0,SAPOPEMBA,76,32,SAPOPEMBA,SAP,13631253.32,13.631
58268,2024-01-31 00:00:00,QUEDA DE ARVORE,IQ - ITAQUERA,32058056,POINT (354127.126 7394828.661),9.0,JOSE BONIFACIO,47,27,ITAQUERA,JBO,14566043.13,14.566
58269,2024-01-31 00:00:00,INUNDACAO,CS - CAPELA DO SOCORRO,32058336,POINT (328375.103 7375680.959),40.0,CIDADE DUTRA,23,19,CAPELA DO SOCORRO,CDU,27965809.76,27.966
58270,2024-01-31 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,32058368,POINT (324562.933 7390714.965),87.0,BUTANTA,12,10,BUTANTA,BUT,12952409.83,12.952


In [9]:
base_final = gdf_ocorrencias_com_distritos.drop(columns=['name', 'index_right', 'ds_codigo', 'ds_cd_sub', 'ds_sigla', 'ds_areamt','ds_areakm', 'ds_subpref'])

In [10]:
base_final

Unnamed: 0,data,ocorrencia,subprefeit,geometry,ds_nome
0,2013-12-29 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,POINT Z (318923.714 7389107.424 0),RAPOSO TAVARES
1,2013-12-03 00:00:00,QUEDA DE ARVORE,CS - CAPELA DO SOCORRO,POINT Z (326291.01 7378438.554 0),SOCORRO
2,2013-12-10 00:00:00,ALAGAMENTO,SA - SANTO AMARO,POINT Z (325784.879 7383070.03 0),SANTO AMARO
3,2013-12-30 00:00:00,QUEDA DE ARVORE,SE - SE,POINT Z (332913.137 7396238.346 0),REPUBLICA
4,2013-12-29 00:00:00,QUEDA DE ARVORE,SA - SANTO AMARO,POINT Z (325940.608 7385402.683 0),SANTO AMARO
...,...,...,...,...,...
58267,2024-01-31 00:00:00,ALAGAMENTO,SB - SAPOPEMBA,POINT (347483.632 7386553.452),SAPOPEMBA
58268,2024-01-31 00:00:00,QUEDA DE ARVORE,IQ - ITAQUERA,POINT (354127.126 7394828.661),JOSE BONIFACIO
58269,2024-01-31 00:00:00,INUNDACAO,CS - CAPELA DO SOCORRO,POINT (328375.103 7375680.959),CIDADE DUTRA
58270,2024-01-31 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,POINT (324562.933 7390714.965),BUTANTA


In [11]:
df = base_final.copy()

In [12]:
df['geometry'] = df['geometry'].astype(str)

  df['geometry'] = df['geometry'].astype(str)


In [13]:
# Definindo o sistema de coordenadas UTM zona 23S
utm_proj_2d = Proj(proj='utm', zone=23, south=True, ellps='WGS84')
utm_proj_3d = Proj(proj='utm', zone=23, south=True, ellps='WGS84', datum='WGS84')
wgs84_proj = Proj(proj='latlong', datum='WGS84')

transformer_2d = Transformer.from_proj(utm_proj_2d, wgs84_proj)
transformer_3d = Transformer.from_proj(utm_proj_3d, wgs84_proj)

def converte_para_latlon(point_str):
    # Detectar se é POINT Z ou POINT
    if 'POINT Z' in point_str:
        # Extrair as coordenadas do formato POINT Z
        parts = point_str.replace('POINT Z (', '').replace(')', '').split()
        x, y = float(parts[0]), float(parts[1])
        # Converter coordenadas UTM 3D para latitude e longitude
        lat, lon = transformer_3d.transform(x, y)
    elif 'POINT' in point_str:
        # Extrair as coordenadas do formato POINT
        parts = point_str.replace('POINT (', '').replace(')', '').split()
        x, y = float(parts[0]), float(parts[1])
        # Converter coordenadas UTM 2D para latitude e longitude
        lat, lon = transformer_2d.transform(x, y)
    else:
        # Caso não seja um formato reconhecido
        return None, None
    return lat, lon

# Aplicar a função para criar as novas colunas 'Latitude' e 'Longitude'
df[['Longitude', 'Latitude']] = df['geometry'].apply(lambda x: pd.Series(converte_para_latlon(x)))

# Exibindo o DataFrame atualizado
df

Unnamed: 0,data,ocorrencia,subprefeit,geometry,ds_nome,Longitude,Latitude
0,2013-12-29 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,POINT Z (318923.7137 7389107.424428 0),RAPOSO TAVARES,-46.774643,-23.598443
1,2013-12-03 00:00:00,QUEDA DE ARVORE,CS - CAPELA DO SOCORRO,POINT Z (326291.010146 7378438.554415 0),SOCORRO,-46.703712,-23.695577
2,2013-12-10 00:00:00,ALAGAMENTO,SA - SANTO AMARO,POINT Z (325784.879025 7383070.030225 0),SANTO AMARO,-46.708130,-23.653706
3,2013-12-30 00:00:00,QUEDA DE ARVORE,SE - SE,POINT Z (332913.137082 7396238.345912 0),REPUBLICA,-46.636786,-23.535563
4,2013-12-29 00:00:00,QUEDA DE ARVORE,SA - SANTO AMARO,POINT Z (325940.608356 7385402.683087 0),SANTO AMARO,-46.706331,-23.632662
...,...,...,...,...,...,...,...
58267,2024-01-31 00:00:00,ALAGAMENTO,SB - SAPOPEMBA,POINT (347483.631646 7386553.451906),SAPOPEMBA,-46.495083,-23.624450
58268,2024-01-31 00:00:00,QUEDA DE ARVORE,IQ - ITAQUERA,POINT (354127.125688 7394828.6609),JOSE BONIFACIO,-46.429165,-23.550340
58269,2024-01-31 00:00:00,INUNDACAO,CS - CAPELA DO SOCORRO,POINT (328375.102682 7375680.958842),CIDADE DUTRA,-46.683598,-23.720698
58270,2024-01-31 00:00:00,QUEDA DE ARVORE,BT - BUTANTA,POINT (324562.933229 7390714.964635),BUTANTA,-46.719206,-23.584550


In [30]:
df.to_csv('../dados/ocorrencias_com_distritos.csv', index=False)

In [25]:
df_aws = pd.read_csv('../dados/ocorrencias_com_distritos.csv', sep=',')

In [26]:
df_aws

Unnamed: 0,data,data_hora,ocorrencia,subprefeit,geometry,ds_nome,Longitude,Latitude
0,29/12/2013,29/12/2013 00:00,QUEDA DE ARVORE,BT - BUTANTA,POINT Z (318923.7137 7389107.424428 0),RAPOSO TAVARES,-4.677.464.276.631.380,-23.598.442.687.972.200
1,03/12/2013,03/12/2013 00:00,QUEDA DE ARVORE,CS - CAPELA DO SOCORRO,POINT Z (326291.010146 7378438.554415 0),SOCORRO,-4.670.371.173.463.540,-23.695.576.883.145.000
2,10/12/2013,10/12/2013 00:00,ALAGAMENTO,SA - SANTO AMARO,POINT Z (325784.879025 7383070.030225 0),SANTO AMARO,-4.670.813.032.481.320,-23.653.706.273.415.000
3,30/12/2013,30/12/2013 00:00,QUEDA DE ARVORE,SE - SE,POINT Z (332913.137082 7396238.345912 0),REPUBLICA,-46.636.786.132.214.800,-23.535.563.110.036.600
4,29/12/2013,29/12/2013 00:00,QUEDA DE ARVORE,SA - SANTO AMARO,POINT Z (325940.608356 7385402.683087 0),SANTO AMARO,-46.706.330.823.636.300,-2.363.266.230.962.370
...,...,...,...,...,...,...,...,...
58267,31/01/2024,31/01/2024 00:00,ALAGAMENTO,SB - SAPOPEMBA,POINT (347483.631646 7386553.451906),SAPOPEMBA,-4.649.508.310.000.040,-2.362.445.049.928.760
58268,31/01/2024,31/01/2024 00:00,QUEDA DE ARVORE,IQ - ITAQUERA,POINT (354127.125688 7394828.6609),JOSE BONIFACIO,-4.642.916.489.999.810,-23.550.339.599.290.200
58269,31/01/2024,31/01/2024 00:00,INUNDACAO,CS - CAPELA DO SOCORRO,POINT (328375.102682 7375680.958842),CIDADE DUTRA,-4.668.359.769.999.980,-23.720.697.999.281.000
58270,31/01/2024,31/01/2024 00:00,QUEDA DE ARVORE,BT - BUTANTA,POINT (324562.933229 7390714.964635),BUTANTA,-46.719.205.899.991.200,-2.358.455.039.929.060


In [27]:
import pandas as pd
import boto3
import os
from dotenv import load_dotenv
from io import StringIO

In [28]:
# Função para enviar para o S3
def upload_to_s3(df):
    # Configurações do S3
    access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
    secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")

    s3 = boto3.client('s3',
                  aws_access_key_id=access_key_id,
                  aws_secret_access_key=secret_access_key,
                  region_name = 'sa-east-1')

    # Carregar o arquivo CSV para o S3
    s3.put_object(Bucket='black-umbrella-fiap',
                Key='bronze/ocorrencias/ocorrencias_com_distritos.csv',
                Body=df.getvalue())
    
    print('Arquivo enviado para o S3 com sucesso!')

In [29]:
csv_buffer = StringIO()
df_aws.to_csv(csv_buffer, index=False)
# Enviar os arquivos particionados para o S3
upload_to_s3(csv_buffer)

Arquivo enviado para o S3 com sucesso!
