In [None]:
#| default_exp datasources.aisgeo
%load_ext autoreload
%autoreload 2

import sys, os
from pathlib import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))
os.chdir(Path.cwd().parent / 'extracao')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# GEOAISWEB

> Este módulo concentra as constantes, funções de carga, processamento, mesclagem e salvamento de dados aeronáuticos provenientes da API do GeoAisWeb

In [None]:
#| export
import json
import os
from pathlib import Path
from typing import List
from urllib.request import urlopen

import pandas as pd
from dotenv import find_dotenv, load_dotenv
from extracao.constants import VOR_ILS_DME

load_dotenv(find_dotenv(), override=True)

True

## CONSTANTES


Dados para acesso à API GEOAISWEB

In [None]:
#| export
LINK_VOR = 'https://geoaisweb.decea.mil.br/geoserver/ICA/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=ICA:vor&outputFormat=application%2Fjson'
LINK_DME = 'https://geoaisweb.decea.mil.br/geoserver/ICA/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=ICA:dme&outputFormat=application%2Fjson'
LINK_NDB = 'https://geoaisweb.decea.mil.br/geoserver/ICA/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=ICA:ndb&outputFormat=application%2Fjson'
COLS_VOR = (
	'properties.frequency',
	'properties.frequnits',
	'properties.latitude',
	'properties.longitude',
	'properties.tipo',
	'properties.txtname',
	'properties.txtrmk',
)
COLS_NDB = (
	'properties.valfreq',
	'properties.uomfreq',
	'properties.geolat',
	'properties.geolong',
	'properties.tipo',
	'properties.txtname',
	'properties.txtrmk',
)

COLS_DME = (
	'properties.valchannel',
	'properties.codechanne',
	'properties.geolat',
	'properties.geolong',
	'properties.tipo',
	'properties.txtname',
	'Channel',
)

UNIQUE_COLS = ['Frequência', 'Latitude', 'Longitude']


In [None]:
#| export
def convert_frequency(
	freq: float,  # Frequência Central da emissão
	unit: str,  # Unidade da Frequência: [Hz, kHz, MHZ, GHZ]
) -> float:  # Frequência em MHz
	"""Converte a frequência `freq` para MHz"""
	unit = unit.upper()
	if unit == 'GHZ':
		return freq * 1000
	elif unit == 'KHZ':
		return freq / 1000
	elif unit == 'HZ':
		return freq / 1e6
	elif unit == 'MHZ':
		return freq
	else:
		return -1


In [None]:
#| export
def _process_frequency(
	df: pd.DataFrame,  # Dataframe com os dados
	cols: List[str],  # Subconjunto de Colunas relevantes do DataFrame
) -> pd.DataFrame:  # Dataframe com os dados de frequência devidamente processados
	if cols == COLS_DME:
		df_channels = pd.read_csv(VOR_ILS_DME, dtype='string', dtype_backend='pyarrow')
		df = df.dropna(subset=[cols[0]])
		df['Channel'] = df[cols[0]].astype('int').astype('string') + df[cols[1]]
		df['Frequência'] = -1.0

		for row in df.itertuples(index=True):
			row_match = df_channels.loc[(df_channels.Channel == row.Channel), 'DMEground']
			if not row_match.empty:
				df.loc[row.Index, 'Frequência'] = float(row_match.item())

	else:
		df['Frequência'] = (
			df[[cols[0], cols[1]]]
			.apply(lambda x: convert_frequency(x[0], x[1]), axis=1)
			.astype('float')
		)
	return df


In [None]:
#| export
def _filter_df(df, cols):  # sourcery skip: use-fstring-for-concatenation
	df.fillna('', inplace=True)
	df['Entidade'] = (df[cols[4]] + ' - ' + df[cols[5]] + ' ' + df[cols[6]]).astype('string')
	df['Fonte'] = 'AISGEO'
	df = df[['Frequência', cols[2], cols[3], 'Entidade', 'Fonte']]
	return df.rename(
		columns={
			cols[2]: 'Latitude',
			cols[3]: 'Longitude',
		}
	)


In [None]:
#|export
def get_geodf(
	link: str,  # Link para a requisição das estações VOR do GEOAISWEB
	cols: List[str],  # Subconjunto de Colunas relevantes do DataFrame
) -> pd.DataFrame:  # DataFrame com frequências, coordenadas e descrição das estações VOR
	# sourcery skip: use-fstring-for-concatenation
	"""Faz a requisição do `link`, processa o json e o retorna como Dataframe"""
	response = urlopen(link)
	if response.status != 200 or 'application/json' not in response.headers['content-type']:
		raise ValueError(f'Resposta a requisição não foi bem sucedida: {response.status=}')
	data_json = json.loads(response.read())
	df = pd.json_normalize(
		data_json['features'],
	).filter(cols, axis=1)
	df = _process_frequency(df, cols)
	return _filter_df(df, cols)


In [None]:
#| eval: false
get_geodf(LINK_VOR, COLS_VOR)

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,116.8,-16.245367,-48.979089,[AISG] VOR - ANÁPOLIS CH 101X
1,117.0,-19.689048,-47.060575,[AISG] VOR - ARAXÁ
2,113.4,-9.868361,-56.104951,[AISG] VOR - ALTA FLORESTA CH 81X OPR INFRAERO
3,116.2,-22.951451,-46.569805,[AISG] VOR - BRAGANÇA CH 109X
4,114.3,-12.079940,-45.007135,[AISG] VOR - BARREIRAS CH 90X
...,...,...,...,...
74,112.1,-25.583203,-54.503514,[AISG] VOR - FOZ CH 58X
75,115.3,-31.390714,-54.109717,[AISG] VOR - BAGÉ U/S BTN RDL 275/305
76,116.9,-23.627464,-46.654635,[AISG] VOR - CONGONHAS CH 116X \nVOR/DME NO AV...
77,115.9,-14.799000,-64.938333,[AISG] VOR - TRINIDAD-BL See Bolivia AIP


In [None]:
#| eval: false
get_geodf(LINK_NDB, COLS_NDB)

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,0.265,-21.139333,-50.426667,[AISG] NDB - ARAÇATUBA OPR TAM
1,0.3,-25.402667,-49.229,"[AISG] NDB - BACACHERI FM ARP, COVERAGE 50NM"
2,0.38,-22.314,-49.107167,[AISG] NDB - BAURU COVERAGE 50NM OPR INFRAERO
3,0.23,-7.266,-35.892667,[AISG] NDB - CAMPINA GRANDE COVERAGE 60NM OPR ...
4,0.42,-20.813167,-49.406167,[AISG] NDB - RIO PRETO OPR DAESP
5,0.407,-4.194997,-69.939733,[AISG] NDB - LETÍCIA OPR COLÔMBIA
6,0.25,-29.694722,-57.148056,[AISG] NDB - PASO DE LOS LIBRES OPR ARGENTINA
7,0.295,-19.6589,-43.896933,[AISG] NDB - LAGOA SANTA COVERAGE 50NM
8,0.375,-19.016219,-57.664456,[AISG] NDB - CORUMBÁ
9,0.205,-5.386167,-35.531,[AISG] NDB - MAXARANGUAPE


In [None]:
#| eval: false
get_geodf(LINK_DME, COLS_DME)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Channel"] = df[cols[0]].astype("int").astype("string") + df[cols[1]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Frequency"] = -1.0


Unnamed: 0,Frequency,Latitude,Longitude,Description
0,1019.0,-22.812774,-42.095339,[AISG] DME - ALDEIA 58X
1,1202.0,-16.245367,-48.979089,[AISG] DME - ANÁPOLIS 115X
2,1204.0,-19.689048,-47.060575,[AISG] DME - ARAXÁ 117X
3,1168.0,-9.868361,-56.104951,[AISG] DME - ALTA FLORESTA 81X
4,1196.0,-22.951358,-46.569900,[AISG] DME - BRAGANÇA 109X
...,...,...,...,...
148,1175.0,-25.537761,-48.529855,[AISG] DME - PARANAGUÁ 88X
149,1181.0,-18.203235,-45.457072,[AISG] DME - TRÊS MARIAS 94X
150,1171.0,-27.621855,-48.632464,[AISG] DME - BIGUAÇU 84X
151,1186.0,-14.907781,-40.918839,[AISG] DME - VITÓRIA DA CONQUISTA 99X


In [None]:
#| export
def get_aisg() -> pd.DataFrame:  # DataFrame com todos os dados do GEOAISWEB
	"""Lê e processa os dataframes individuais da API GEOAISWEB e retorna o conjunto concatenado"""
	df = pd.concat(
		get_geodf(link, cols)
		for link, cols in zip([LINK_NDB, LINK_VOR, LINK_DME], [COLS_NDB, COLS_VOR, COLS_DME])
	)
	return df.astype('string').drop_duplicates(UNIQUE_COLS, ignore_index=True)