In [None]:
#| default_exp anatel
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

# Anatel

> Este módulo consolida as bases da Anatel e realiza pós-processamento dos dados obtidos.

In [None]:
#| export
import shutil
import urllib.request
from functools import cached_property
from typing import List
from zipfile import ZipFile

import geopandas as gpd
import pandas as pd
from dotenv import find_dotenv, load_dotenv
from fastcore.foundation import L
from fastcore.parallel import parallel
from fastcore.xtras import Path

from extracao.constants import COLUNAS, IBGE_MUNICIPIOS, IBGE_POLIGONO, MALHA_IBGE
from extracao.datasources.aeronautica import Aero
from extracao.datasources.base import Base
from extracao.datasources.mosaico import MONGO_URI
from extracao.datasources.sitarweb import SQLSERVER_PARAMS, Radcom, Stel
from extracao.datasources.smp import SMP
from extracao.datasources.srd import SRD
from extracao.datasources.telecom import Telecom

In [None]:
#| export
load_dotenv(find_dotenv())


True

## Base Consolidada ANATEL

In [None]:
# |export
class Outorgadas(Base):
	"""Classe auxiliar para agregar os dados originários da Anatel"""

	def __init__(
		self,
		sql_params: dict = SQLSERVER_PARAMS,
		mongo_uri: str = MONGO_URI,
		limit: int = 0,
	):
		self.sql_params = sql_params
		self.mongo_uri = mongo_uri
		self.limit = limit

	@property
	def columns(self):
		return COLUNAS

	@cached_property
	def df_cache(self) -> pd.DataFrame:
		try:
			df = self._read(self.stem)
		except ValueError:
			df = pd.DataFrame(columns=self.columns)
		return df

	@property
	def stem(self):
		return 'anatel'

	@staticmethod
	def _update_instance(class_instance):
		class_instance.update()
		class_instance.save()
		return class_instance.df

	@cached_property
	def extraction(self) -> L:
		sources = [
			Telecom(self.mongo_uri, self.limit),
			SMP(self.mongo_uri, self.limit),
			SRD(self.mongo_uri),
			Stel(self.sql_params),
			Radcom(self.sql_params),
			Aero(),
		]

		return parallel(Outorgadas._update_instance, sources, n_workers=len(sources), progress=True)

	@staticmethod
	def verify_shapefile_folder():
		# Convert the file paths to Path objects
		shapefile_path = Path(IBGE_POLIGONO)
		parent_folder = shapefile_path.parent
		parent_folder.mkdir(exist_ok=True, parents=True)
		zip_file_path = parent_folder.with_suffix('.zip')

		# Check if all required files exist
		required_files = L('.cpg', '.dbf', '.prj', '.shx').map(shapefile_path.with_suffix)
		if not all(required_files.map(Path.is_file)):
			# shutil.rmtree(str(shapefile_path.parent), ignore_errors=True)
			parent_folder.ls().map(Path.unlink)
			# Download and unzip the zipped folder
			urllib.request.urlretrieve(MALHA_IBGE, zip_file_path)
			with ZipFile(zip_file_path, 'r') as zip_ref:
				zip_ref.extractall(parent_folder)
			zip_file_path.unlink()

	def fill_nan_coordinates(
		self,
		df: pd.DataFrame,  # DataFrame com os dados da Anatel
	) -> pd.DataFrame:  # DataFrame com as coordenadas validadas na base do IBGE
		"""Valida as coordenadas consultado a Base Corporativa do IBGE, excluindo o que já está no cache na versão anterior"""

		municipios = pd.read_csv(
			IBGE_MUNICIPIOS,
			usecols=['Código_Município', 'Latitude', 'Longitude'],
			dtype='string[pyarrow]',
			dtype_backend='pyarrow',
		)

		df = pd.merge(df, municipios, on='Código_Município', how='left', copy=False)

		null_coords = df.Latitude_x.isna() | df.Longitude_x.isna()

		df.loc[null_coords, ['Latitude_x', 'Longitude_x']] = df.loc[
			null_coords, ['Latitude_y', 'Longitude_y']
		]

		log = """[("Colunas", ["Latitude", "Longitude"]),
		           ("Processamento", "Coordenadas Ausentes. Inserido coordenadas do Município")]"""
		df = self.register_log(df, log, null_coords)

		df.rename(
			columns={
				'Latitude_x': 'Latitude',
				'Longitude_x': 'Longitude',
				'Latitude_y': 'Latitude_ibge',
				'Longitude_y': 'Longitude_ibge',
			},
			inplace=True,
		)

		return df

	def intersect_coordinates_on_poligon(self, df: pd.DataFrame, check_municipio: bool = True):
		regions = gpd.read_file(IBGE_POLIGONO)

		# Convert pandas dataframe to geopandas df with geometry point given coordinates
		gdf_points = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude))

		# Set the same coordinate reference system (CRS) as the regions shapefile
		gdf_points.crs = regions.crs

		# Spatial join points to the regions
		gdf = gpd.sjoin(gdf_points, regions, how='inner', predicate='within')

		if check_municipio:
			# Check correctness of Coordinates
			check_coords = gdf.Código_Município != gdf.CD_MUN

			log = """[("Colunas", ["Código_Município", "Município", "UF"]),
					("Processamento", "Informações substituídas  pela localização correta das coordenadas.")		      
				"""
			self.register_log(gdf, log, check_coords)

			gdf.drop(['Código_Município', 'Município', 'UF'], axis=1, inplace=True)

		gdf.rename(
			columns={
				'CD_MUN': 'Código_Município',
				'NM_MUN': 'Município',
				'SIGLA_UF': 'UF',
			},
			inplace=True,
		)

		return gdf

	def validate_coordinates(self, df: pd.DataFrame, check_municipio: bool = True) -> pd.DataFrame:
		"""
		Validates the coordinates in the given DataFrame.

		Args:
		        df: The DataFrame containing the coordinates to be validated.
		        check_municipio: A boolean indicating whether to check the municipality information (default: True).

		Returns:
		        pd.DataFrame: The DataFrame with validated coordinates.

		Raises:
		        None
		"""
		self.verify_shapefile_folder()
		if check_municipio:
			df = self.fill_nan_coordinates(df)
		return self.intersect_coordinates_on_poligon(df, check_municipio)

	def _format(
		self,
		dfs: List,  # List with the individual API sources
	) -> pd.DataFrame:  # Processed DataFrame
		aero = self.validate_coordinates(dfs.pop(), False)
		anatel = self.validate_coordinates(pd.concat(dfs, ignore_index=True))
		df = pd.concat([aero, anatel], ignore_index=True).sort_values(
			['Frequência', 'Latitude', 'Longitude'], ignore_index=True
		)
		return df.loc[:, self.columns]

In [None]:
import geopandas as gpd
import pandas as pd
from fastcore.xtras import Path

In [None]:
files = Path.cwd().parent / 'extracao' / 'datasources' / 'arquivos'

In [None]:
files.ls()

(#8) [Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/BR_Municipios_2022'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/canalizacao_smp.csv'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/municipios.csv'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/radares.csv'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/saida'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/VHF_COM.csv'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/VHF_NAV.csv'),Path('/mnt/c/Users/rsilva/OneDrive - ANATEL/Code/anateldb/extracao/datasources/arquivos/VOR_ILS_DME_Channel.csv')]

In [None]:
regions = gpd.read_file(files / 'BR_Municipios_2022' / 'BR_Municipios_2022.shp')

In [None]:
df = pd.read_parquet(files / 'saida' / 'telecom.parquet.gzip')

In [None]:
df

Unnamed: 0,Frequência,Entidade,Fistel,Número_Estação,Município,Código_Município,UF,Latitude,Longitude,Classe,...,Cod_Tipo_Antena,Polarização,Ganho_Antena,FC_Antena,Ang_MP_Antena,Ângulo_Elevação,Azimute,Altura_Antena,Perdas_Acessorias,Log
0,173.075,TRACKER DO BRASIL LTDA,50004466071,505805553,Mairiporã,3528502,SP,-23.4030555555555,-46.63583333333333,FB,...,86,V,5.8,,,,,60.0,,
1,173.075,TRACKER DO BRASIL LTDA,50004466071,684490021,Belo Horizonte,3106200,MG,-19.793333333333333,-43.968888888888834,FB,...,86,V,9.0,,,,,20.0,,
2,173.075,TRACKER DO BRASIL LTDA,50004466071,684587017,Tubarão,4218707,SC,-28.520833333333332,-48.98527777777767,FB,...,86,V,9.0,,,,,25.0,,
3,173.075,TRACKER DO BRASIL LTDA,50004466071,684610094,Florianópolis,4205407,SC,-27.588611111111,-48.533611111111,FB,...,86,V,9.0,,,,,15.0,,
4,173.075,TRACKER DO BRASIL LTDA,50004466071,684649900,João Monlevade,3136207,MG,-19.868888888888833,-43.193333333333335,FB,...,86,V,9.0,,,,,20.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12755,788.0,TELEFONICA BRASIL S.A.,50419058354,1014443412,MACAE,3302403,RJ,-22.623611,-39.98972,FB,...,760,X,14.9,22.0,68.0,0.0,60.0,71.5,0.0,
12756,1865.0,TELEFONICA BRASIL S.A.,50419058354,1014443447,Vitória,3205309,ES,-21.939722,-39.785278,FB,...,760,X,16.7,25.0,64.0,0.0,206.0,61.47,0.0,
12757,788.0,TELEFONICA BRASIL S.A.,50419058354,1014443447,Vitória,3205309,ES,-21.939722,-39.785278,FB,...,760,X,14.9,22.0,68.0,0.0,206.0,61.47,0.0,
12758,1862.5,TELEFONICA BRASIL S.A.,50419058354,1014443471,Vitória,3205309,ES,-21.213889,-39.997222,FB,...,760,X,16.7,25.0,64.0,0.0,187.0,58.05,0.0,


In [None]:
from shapely.geometry import Point

# Suppose `df` is your DataFrame with 'lat' and 'lon' columns
# Convert the DataFrame into a GeoDataFrame
gdf_points = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude))

# Set the same coordinate reference system (CRS) as the regions shapefile
gdf_points.crs = regions.crs


In [None]:
gdf_points

Unnamed: 0,Frequência,Entidade,Fistel,Número_Estação,Município,Código_Município,UF,Latitude,Longitude,Classe,...,Polarização,Ganho_Antena,FC_Antena,Ang_MP_Antena,Ângulo_Elevação,Azimute,Altura_Antena,Perdas_Acessorias,Log,geometry
0,173.075,TRACKER DO BRASIL LTDA,50004466071,505805553,Mairiporã,3528502,SP,-23.4030555555555,-46.63583333333333,FB,...,V,5.8,,,,,60.0,,,POINT (-46.63583 -23.40306)
1,173.075,TRACKER DO BRASIL LTDA,50004466071,684490021,Belo Horizonte,3106200,MG,-19.793333333333333,-43.968888888888834,FB,...,V,9.0,,,,,20.0,,,POINT (-43.96889 -19.79333)
2,173.075,TRACKER DO BRASIL LTDA,50004466071,684587017,Tubarão,4218707,SC,-28.520833333333332,-48.98527777777767,FB,...,V,9.0,,,,,25.0,,,POINT (-48.98528 -28.52083)
3,173.075,TRACKER DO BRASIL LTDA,50004466071,684610094,Florianópolis,4205407,SC,-27.588611111111,-48.533611111111,FB,...,V,9.0,,,,,15.0,,,POINT (-48.53361 -27.58861)
4,173.075,TRACKER DO BRASIL LTDA,50004466071,684649900,João Monlevade,3136207,MG,-19.868888888888833,-43.193333333333335,FB,...,V,9.0,,,,,20.0,,,POINT (-43.19333 -19.86889)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12755,788.0,TELEFONICA BRASIL S.A.,50419058354,1014443412,MACAE,3302403,RJ,-22.623611,-39.98972,FB,...,X,14.9,22.0,68.0,0.0,60.0,71.5,0.0,,POINT (-39.98972 -22.62361)
12756,1865.0,TELEFONICA BRASIL S.A.,50419058354,1014443447,Vitória,3205309,ES,-21.939722,-39.785278,FB,...,X,16.7,25.0,64.0,0.0,206.0,61.47,0.0,,POINT (-39.78528 -21.93972)
12757,788.0,TELEFONICA BRASIL S.A.,50419058354,1014443447,Vitória,3205309,ES,-21.939722,-39.785278,FB,...,X,14.9,22.0,68.0,0.0,206.0,61.47,0.0,,POINT (-39.78528 -21.93972)
12758,1862.5,TELEFONICA BRASIL S.A.,50419058354,1014443471,Vitória,3205309,ES,-21.213889,-39.997222,FB,...,X,16.7,25.0,64.0,0.0,187.0,58.05,0.0,,POINT (-39.99722 -21.21389)


In [None]:
# Spatial join points to the regions
points_with_regions = gpd.sjoin(gdf_points, regions, how="inner", predicate="within")


In [None]:
points_with_regions

Unnamed: 0,Frequência,Entidade,Fistel,Número_Estação,Município,Código_Município,UF,Latitude,Longitude,Classe,...,Azimute,Altura_Antena,Perdas_Acessorias,Log,geometry,index_right,CD_MUN,NM_MUN,SIGLA_UF,AREA_KM2
0,173.075,TRACKER DO BRASIL LTDA,50004466071,505805553,Mairiporã,3528502,SP,-23.4030555555555,-46.63583333333333,FB,...,,60.0,,,POINT (-46.63583 -23.40306),3587,3528502,Mairiporã,SP,320.697
1330,368.7875,VERSATIL RADIOCOMUNICACAO LTDA,50416767397,1007057677,Mairiporã,3528502,SP,-23.317097222222166,-46.5897,ML,...,,1.5,,,POINT (-46.58970 -23.31710),3587,3528502,Mairiporã,SP,320.697
1331,372.9625,VERSATIL RADIOCOMUNICACAO LTDA,50416767397,1007057677,Mairiporã,3528502,SP,-23.317097222222166,-46.5897,ML,...,,1.5,,,POINT (-46.58970 -23.31710),3587,3528502,Mairiporã,SP,320.697
1332,373.1125,VERSATIL RADIOCOMUNICACAO LTDA,50416767397,1007057677,Mairiporã,3528502,SP,-23.317097222222166,-46.5897,ML,...,,1.5,,,POINT (-46.58970 -23.31710),3587,3528502,Mairiporã,SP,320.697
1333,373.3375,VERSATIL RADIOCOMUNICACAO LTDA,50416767397,1007057677,Mairiporã,3528502,SP,-23.317097222222166,-46.5897,ML,...,,1.5,,,POINT (-46.58970 -23.31710),3587,3528502,Mairiporã,SP,320.697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12607,8426.0,INTERJATO SERVICOS DE TELECOMUNICACOES LTDA,50419573127,1011176413,Nova Cruz,2408300,RN,-6.454325,-35.376041666666666,FX,...,275.8,13.0,,,POINT (-35.37604 -6.45432),1165,2408300,Nova Cruz,RN,277.658
12614,10.835,INTERJATO SERVICOS DE TELECOMUNICACOES LTDA,50419573127,1011208072,São Miguel do Gostoso,2412559,RN,-5.1513888888888335,-35.6645305555555,FX,...,156.7,22.0,,,POINT (-35.66453 -5.15139),1211,2412559,São Miguel do Gostoso,RN,431.444
12615,8314.0,INTERJATO SERVICOS DE TELECOMUNICACOES LTDA,50419573127,1011224205,Rio do Fogo,2408953,RN,-5.3348305555555,-35.3832,FX,...,286.7,41.0,,,POINT (-35.38320 -5.33483),1172,2408953,Rio do Fogo,RN,151.097
12694,19452.5,Sempre Telecomunicacoes Ltda,50442657307,1014329180,Brumadinho,3109006,MG,-20.113217,-44.295606,FX,...,233.9,15.0,,,POINT (-44.29561 -20.11322),2340,3109006,Brumadinho,MG,639.434
