In [1]:
import pandas as pd
import geopandas as gpd
import datetime
import os
import shapely
import matplotlib.pyplot as plt
import folium


In [2]:
def abrir_dados_estacoes() -> pd.DataFrame:     
    dados_inmet = {
        'regiao': [],
        'estado': [],
        'codigo_estacao': [],
        'nome_estacao': [],
        'lat': [],
        'long': [],
        'alt': [],
        'data_fundacao': [],
        'nome_arquivo': []
    }
    caminho_inmet = '../data/INMET'
    for dir in os.listdir(caminho_inmet):
        if os.path.isdir(os.path.join(caminho_inmet, dir)):
            for file in os.listdir(os.path.join(caminho_inmet, dir)):
                if file.endswith('.csv') or file.endswith('.CSV'):
                    f = open(os.path.join(caminho_inmet, dir, file), 'r', encoding='latin1')
                    lines = [f.readline().split(';')[-1].strip() for _ in range(8)]
                    dados_inmet['regiao'].append(lines[0])
                    dados_inmet['estado'].append(lines[1])
                    dados_inmet['codigo_estacao'].append(lines[2])
                    dados_inmet['nome_estacao'].append(lines[3])
                    dados_inmet['lat'].append(float(lines[4].replace(',', '.')))
                    dados_inmet['long'].append(float(lines[5].replace(',', '.')))
                    dados_inmet['alt'].append(float(lines[6].replace(',', '.')))
                    dados_inmet['data_fundacao'].append(datetime.datetime.strptime(lines[7], '%d/%m/%y'))
                    dados_inmet['nome_arquivo'].append(os.path.join(caminho_inmet, dir, file))

    print("[info] Dados INMET carregados")
    return pd.DataFrame(dados_inmet)

In [3]:
inmet_df = abrir_dados_estacoes()
inmet_gdf = gpd.GeoDataFrame(inmet_df, geometry=gpd.points_from_xy(inmet_df['long'], inmet_df['lat']))
inmet_gdf.crs = "EPSG:4326"
inmet_gdf.to_file('../data/INMET/estacoes_inmet.geojson', driver='GeoJSON')
inmet_gdf

[info] Dados INMET carregados


Unnamed: 0,regiao,estado,codigo_estacao,nome_estacao,lat,long,alt,data_fundacao,nome_arquivo,geometry
0,S,RS,MOSTARDAS,A878,-31.248333,-50.906389,3.82,2008-03-11,../data/INMET/2022/INMET_S_RS_A878_MOSTARDAS_0...,POINT (-50.90639 -31.24833)
1,CO,GO,ITUMBIARA,A035,-18.409722,-49.191944,491.17,2007-11-01,../data/INMET/2022/INMET_CO_GO_A035_ITUMBIARA_...,POINT (-49.19194 -18.40972)
2,S,RS,DOM PEDRITO,A881,-31.002500,-54.618056,150.00,2010-04-23,../data/INMET/2022/INMET_S_RS_A881_DOM PEDRITO...,POINT (-54.61806 -31.00250)
3,CO,MT,VILA BELA DA SANTISSIMA TRINDADE,A922,-15.062778,-59.873056,213.00,2006-12-01,../data/INMET/2022/INMET_CO_MT_A922_VILA BELA ...,POINT (-59.87306 -15.06278)
4,SE,MG,ALMENARA,A508,-16.166667,-40.687778,189.11,2002-12-15,../data/INMET/2022/INMET_SE_MG_A508_ALMENARA_0...,POINT (-40.68778 -16.16667)
...,...,...,...,...,...,...,...,...,...,...
2871,S,RS,CAMAQUA,A838,-30.807953,-51.834240,92.30,2006-12-12,../data/INMET/2021/INMET_S_RS_A838_CAMAQUA_01-...,POINT (-51.83424 -30.80795)
2872,NE,BA,BOM JESUS DA LAPA,A418,-13.251111,-43.405278,447.75,2007-05-18,../data/INMET/2021/INMET_NE_BA_A418_BOM JESUS ...,POINT (-43.40528 -13.25111)
2873,N,AM,S. G. DA CACHOEIRA,A134,-0.125207,-67.061246,79.67,2011-08-31,../data/INMET/2021/INMET_N_AM_A134_S. G. DA CA...,POINT (-67.06125 -0.12521)
2874,SE,MG,PASSOS,A516,-20.745237,-46.633916,781.70,2006-07-16,../data/INMET/2021/INMET_SE_MG_A516_PASSOS_01-...,POINT (-46.63392 -20.74524)


In [4]:
estacoes_sc_gdf = inmet_gdf[inmet_gdf['estado'] == 'SC']
estacoes_sc_gdf.crs = "EPSG:4326"
print(estacoes_sc_gdf.info(),"\n===================\n",estacoes_sc_gdf.head())
estacoes_sc_gdf.to_file('../data/INMET/SC/estacoes_sc.geojson', driver='GeoJSON')

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 120 entries, 27 to 2843
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   regiao          120 non-null    object        
 1   estado          120 non-null    object        
 2   codigo_estacao  120 non-null    object        
 3   nome_estacao    120 non-null    object        
 4   lat             120 non-null    float64       
 5   long            120 non-null    float64       
 6   alt             120 non-null    float64       
 7   data_fundacao   120 non-null    datetime64[ns]
 8   nome_arquivo    120 non-null    object        
 9   geometry        120 non-null    geometry      
dtypes: datetime64[ns](1), float64(3), geometry(1), object(5)
memory usage: 10.3+ KB
None 
     regiao estado                         codigo_estacao nome_estacao  \
27       S     SC                           RIO NEGRINHO         A862   
33       S     SC           

In [5]:
br101sc_pista_gdf = gpd.read_file('../data/ANTT/BR-101-SC/br101sc_pista_principal.geojson')
br101sc_pista_gdf.crs = "EPSG:4326"
br101sc_pista_gdf.head()

Unnamed: 0,sentido,geometry
0,Crescente,"LINESTRING (-48.88637 -25.98162, -48.88623 -25..."
1,Decrescente,"LINESTRING (-48.88637 -25.98162, -48.88594 -25..."


In [6]:
br101_pista_gdf = gpd.read_file('../data/DATA_FUSION/BR101-Geral/br101_pista_line.geojson')
br101_pista_gdf.crs = "EPSG:4326"
br101_pista_gdf.head()

Unnamed: 0,rodovia,sentido,concessionaria,km,ano_do_pnv_snv,geometry
0,BR-101/BA,Decrescente,ECO101,956.9,2020,"LINESTRING (-39.86679 -18.04251, -39.86679 -18..."
1,BR-101/ES,Decrescente,ECO101,461.054,2020,"LINESTRING (-39.91817 -18.17485, -39.91817 -18..."
2,BR-101/RJ,Decrescente,RIOSP,598.6,2000,"LINESTRING (-41.30947 -21.22261, -41.30947 -21..."
3,BR-101/RS,Decrescente,VIA SUL,87.9,2019,"LINESTRING (-49.77038 -29.29990, -49.77021 -29..."
4,BR-101/SC,Crescente,VIA COSTEIRA,464.2,2020,"LINESTRING (-48.88637 -25.98162, -48.88637 -25..."


In [7]:
from shapely.ops import nearest_points

estacoes_gdf = inmet_gdf.to_crs("EPSG:32722")
pista_principal_gdf = br101sc_pista_gdf.to_crs("EPSG:32722")

def nearest_station(row, pista_gdf):
    station_point = row.geometry
    nearest_geom = nearest_points(station_point, pista_gdf.unary_union)[1]
    distance = station_point.distance(nearest_geom)
    return pd.Series([nearest_geom, distance], index=["nearest_point", "distance_meters"])

estacoes_gdf[["nearest_point", "distance_meters"]] = estacoes_gdf.apply(nearest_station, axis=1, pista_gdf=pista_principal_gdf)

estacoes_proximas_gdf = estacoes_gdf[estacoes_gdf["distance_meters"] <= 50000].sort_values(by="distance_meters")

print(estacoes_proximas_gdf.info())
print(estacoes_proximas_gdf['codigo_estacao'].value_counts())

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 45 entries, 2742 to 2294
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   regiao           45 non-null     object        
 1   estado           45 non-null     object        
 2   codigo_estacao   45 non-null     object        
 3   nome_estacao     45 non-null     object        
 4   lat              45 non-null     float64       
 5   long             45 non-null     float64       
 6   alt              45 non-null     float64       
 7   data_fundacao    45 non-null     datetime64[ns]
 8   nome_arquivo     45 non-null     object        
 9   geometry         45 non-null     geometry      
 10  nearest_point    45 non-null     object        
 11  distance_meters  45 non-null     float64       
dtypes: datetime64[ns](1), float64(4), geometry(1), object(6)
memory usage: 4.6+ KB
None
codigo_estacao
FLORIANOPOLIS                     5
ARARANG

In [8]:
from shapely.ops import nearest_points

estacoes_gdf = inmet_gdf.to_crs("EPSG:32722")
pista_principal_gdf = br101_pista_gdf.to_crs("EPSG:32722")

def nearest_station(row, pista_gdf):
    station_point = row.geometry
    nearest_geom = nearest_points(station_point, pista_gdf.unary_union)[1]
    distance = station_point.distance(nearest_geom)
    return pd.Series([nearest_geom, distance], index=["nearest_point", "distance_meters"])

estacoes_gdf[["nearest_point", "distance_meters"]] = estacoes_gdf.apply(nearest_station, axis=1, pista_gdf=pista_principal_gdf)

estacoes_proximas_br101_gdf = estacoes_gdf[estacoes_gdf["distance_meters"] <= 50000].sort_values(by="distance_meters")

print(estacoes_proximas_br101_gdf.info())
print(estacoes_proximas_br101_gdf['codigo_estacao'].value_counts())

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 195 entries, 2070 to 1221
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   regiao           195 non-null    object        
 1   estado           195 non-null    object        
 2   codigo_estacao   195 non-null    object        
 3   nome_estacao     195 non-null    object        
 4   lat              195 non-null    float64       
 5   long             195 non-null    float64       
 6   alt              195 non-null    float64       
 7   data_fundacao    195 non-null    datetime64[ns]
 8   nome_arquivo     195 non-null    object        
 9   geometry         195 non-null    geometry      
 10  nearest_point    195 non-null    object        
 11  distance_meters  195 non-null    float64       
dtypes: datetime64[ns](1), float64(4), geometry(1), object(6)
memory usage: 19.8+ KB
None
codigo_estacao
SAO MATEUS                              5

In [9]:
estacoes_proximas_gdf.head()

Unnamed: 0,regiao,estado,codigo_estacao,nome_estacao,lat,long,alt,data_fundacao,nome_arquivo,geometry,nearest_point,distance_meters
2742,S,SC,FLORIANOPOLIS,A806,-27.60253,-48.620096,4.87,2003-01-22,../data/INMET/2021/INMET_S_SC_A806_FLORIANOPOL...,POINT (734889.956 6944565.001),POINT (734637.3690424719 6944829.624862457),365.822329
290,S,SC,FLORIANOPOLIS,A806,-27.60253,-48.620096,4.87,2003-01-22,../data/INMET/2022/INMET_S_SC_A806_FLORIANOPOL...,POINT (734889.956 6944565.001),POINT (734637.3690424719 6944829.624862457),365.822329
827,S,SC,FLORIANOPOLIS,A806,-27.60253,-48.620096,4.87,2003-01-22,../data/INMET/2020/INMET_S_SC_A806_FLORIANOPOL...,POINT (734889.956 6944565.001),POINT (734637.3690424719 6944829.624862457),365.822329
2146,S,SC,FLORIANOPOLIS,A806,-27.6025,-48.62,4.87,2003-01-22,../data/INMET/2024/INMET_S_SC_A806_FLORIANOPOL...,POINT (734899.499 6944568.143),POINT (734643.931952765 6944835.889229444),370.138522
1329,S,SC,FLORIANOPOLIS,A806,-27.6025,-48.62,4.87,2003-01-22,../data/INMET/2023/INMET_S_SC_A806_FLORIANOPOL...,POINT (734899.499 6944568.143),POINT (734643.931952765 6944835.889229444),370.138522


In [10]:
x_mean = estacoes_proximas_gdf['long'].mean()
y_mean = estacoes_proximas_gdf['lat'].mean()

m = folium.Map(location=[y_mean, x_mean], zoom_start=8)

folium.GeoJson(pista_principal_gdf,
                style_function=lambda x: {'color': 'darkred', 'weight': 2}).add_to(m)

for i, row in estacoes_proximas_gdf.iterrows():
    folium.Circle(
        location=[row['lat'], row['long']],
        radius=55000,
        popup=row['nome_estacao'],
        color='blue',
        fill=True,
        fill_color='blue'
    ).add_to(m)

m

In [11]:
x_mean = estacoes_proximas_br101_gdf['long'].mean()
y_mean = estacoes_proximas_br101_gdf['lat'].mean()

m = folium.Map(location=[y_mean, x_mean], zoom_start=8)

folium.GeoJson(pista_principal_gdf,
                style_function=lambda x: {'color': 'darkred', 'weight': 2}).add_to(m)

for i, row in estacoes_proximas_br101_gdf.iterrows():
    folium.Circle(
        location=[row['lat'], row['long']],
        radius=50000,
        popup=row['nome_estacao'],
        color='blue',
        fill=True,
        fill_color='blue'
    ).add_to(m)

m

In [12]:
dados_tempo = {}

for i, row in estacoes_proximas_gdf.iterrows():
    estacao = row['nome_estacao']
    dados_tempo[estacao] = pd.read_csv(row['nome_arquivo'], sep=';', encoding='latin1', skiprows=8, parse_dates=['Data'])
    
    dados_tempo[estacao]['Hora UTC'] = dados_tempo[estacao]['Hora UTC'].str.replace('UTC', '')
    dados_tempo[estacao]['Hora UTC'] = pd.to_datetime(dados_tempo[estacao]['Hora UTC'], format='%H:%M')

    dados_tempo[estacao]['Precipitacao Total, Horario (mm)'] = pd.to_numeric(dados_tempo[estacao]['PRECIPITACAO TOTAL, HORARIO (mm)'], errors='coerce')
    dados_tempo[estacao]['ano'] = dados_tempo[estacao]['Data'].dt.year
    dados_tempo[estacao]['mes'] = dados_tempo[estacao]['Data'].dt.month
    dados_tempo[estacao]['dia'] = dados_tempo[estacao]['Data'].dt.day

    dados_tempo[estacao]['Estacao'] = estacao

    dados_tempo[estacao] = dados_tempo[estacao].drop(
        columns=['PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)',
                'PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB)',
                'PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB)',
                'RADIACAO GLOBAL (Kj/m²)',
                'TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)',
                'TEMPERATURA DO PONTO DE ORVALHO (°C)',
                'TEMPERATURA MAXIMA NA HORA ANT. (AUT) (°C)',
                'TEMPERATURA MAXIMA NA HORA ANT. (AUT) (°C).1',
                'TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C)',
                'TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C)',
                'TEMPERATURA MINIMA NA HORA ANT. (AUT) (°C)',
                'TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C)',
                'UMIDADE REL. MAX. NA HORA ANT. (AUT) (%)',
                'UMIDADE REL. MIN. NA HORA ANT. (AUT) (%)',
                'UMIDADE RELATIVA DO AR, HORARIA (%)',
                'VENTO, DIRECAO HORARIA (gr) (° (gr))',
        ])

dados_tempo

ValueError: time data "0000 " doesn't match format "%H:%M", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [None]:
dados_tempo_br101 = {}

for i, row in estacoes_proximas_br101_gdf.iterrows():
    estacao = row['nome_estacao']
    dados_tempo_br101[estacao] = pd.read_csv(row['nome_arquivo'], sep=';', encoding='latin1', skiprows=8, parse_dates=['Data'])
    
    dados_tempo_br101[estacao]['Hora UTC'] = dados_tempo_br101[estacao]['Hora UTC'].str.replace('UTC', '')
    dados_tempo_br101[estacao]['Hora UTC'] = pd.to_datetime(dados_tempo_br101[estacao]['Hora UTC'].str.strip(), format='%H%M')

    dados_tempo_br101[estacao]['PRECIPITAÇÃO TOTAL, HORÁRIO (mm)'] = pd.to_numeric(dados_tempo_br101[estacao]['PRECIPITAÇÃO TOTAL, HORÁRIO (mm)'], errors='coerce')
    dados_tempo_br101[estacao]['ano'] = dados_tempo_br101[estacao]['Data'].dt.year
    dados_tempo_br101[estacao]['mes'] = dados_tempo_br101[estacao]['Data'].dt.month
    dados_tempo_br101[estacao]['dia'] = dados_tempo_br101[estacao]['Data'].dt.day

    dados_tempo_br101[estacao]['Estacao'] = estacao

dados_tempo_br101

{'A616':            Data            Hora UTC  PRECIPITAÇÃO TOTAL, HORÁRIO (mm)  \
 0    2021-01-01 1900-01-01 00:00:00                               0.0   
 1    2021-01-01 1900-01-01 01:00:00                               0.0   
 2    2021-01-01 1900-01-01 02:00:00                               0.0   
 3    2021-01-01 1900-01-01 03:00:00                               0.0   
 4    2021-01-01 1900-01-01 04:00:00                               0.0   
 ...         ...                 ...                               ...   
 8755 2021-12-31 1900-01-01 19:00:00                               0.0   
 8756 2021-12-31 1900-01-01 20:00:00                               0.0   
 8757 2021-12-31 1900-01-01 21:00:00                               0.0   
 8758 2021-12-31 1900-01-01 22:00:00                               0.0   
 8759 2021-12-31 1900-01-01 23:00:00                               0.0   
 
      PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)  \
 0                                     

In [21]:
dados_tempo_br101_total = pd.concat(dados_tempo_br101.values())
dados_tempo_br101_total.head()

Unnamed: 0,Data,Hora UTC,"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (Kj/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),...,UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)",Unnamed: 19,ano,mes,dia,Estacao
0,2021-01-01,1900-01-01 00:00:00,0.0,10108,10108,10102,0,243,203,249,...,78.0,78.0,52.0,52,19,,2021,1,1,A616
1,2021-01-01,1900-01-01 01:00:00,0.0,1011,10112,10108,0,236,203,245,...,77.0,82.0,45.0,42,15,,2021,1,1,A616
2,2021-01-01,1900-01-01 02:00:00,0.0,10107,10112,10107,0,223,205,236,...,82.0,90.0,28.0,25,",8",,2021,1,1,A616
3,2021-01-01,1900-01-01 03:00:00,0.0,10101,10107,10101,0,21,201,224,...,89.0,95.0,295.0,14,",7",,2021,1,1,A616
4,2021-01-01,1900-01-01 04:00:00,0.0,10096,10101,10096,0,21,204,212,...,95.0,96.0,330.0,13,",4",,2021,1,1,A616


In [None]:
dados_estacoes = pd.concat(dados_tempo.values())

dados_estacoes

Unnamed: 0,Data,Hora UTC,"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (Kj/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),...,UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)",Unnamed: 19,Estacao,ano,mes,dia
0,2023-01-01,0000,0,10149,10149,10144,,252,21,254,...,76.0,77.0,5.0,59,18,,A806,2023,1,1
1,2023-01-01,0100,0,10155,10155,10149,,252,208,253,...,76.0,77.0,351.0,59,0,,A806,2023,1,1
2,2023-01-01,0200,0,10157,10158,10155,,247,21,252,...,76.0,80.0,328.0,0,0,,A806,2023,1,1
3,2023-01-01,0300,0,10153,10157,10153,,244,209,249,...,80.0,81.0,314.0,0,0,,A806,2023,1,1
4,2023-01-01,0400,0,10146,10153,10146,,242,206,244,...,80.0,80.0,327.0,0,0,,A806,2023,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2021-12-31,1900,0,8949,8955,8949,29375,248,156,26,...,43.0,57.0,112.0,87,47,,A897,2021,12,31
8756,2021-12-31,2000,0,8947,8949,8946,21726,237,153,249,...,55.0,60.0,112.0,83,44,,A897,2021,12,31
8757,2021-12-31,2100,0,8951,8952,8947,12514,223,161,239,...,58.0,68.0,123.0,71,4,,A897,2021,12,31
8758,2021-12-31,2200,0,8956,8956,8951,2875,199,164,225,...,67.0,81.0,110.0,68,33,,A897,2021,12,31
