In [1]:
#| default_exp main
import sys
from pathlib import Path

In [2]:
# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))
%load_ext autoreload
%autoreload 2

# Principal
> Este módulo concentra funções auxiliares específicas que filtram os dados do banco com campos e formatação de interesse para aplicações específicas como o [appAnalise](https://github.com/EricMagalhaesDelgado/appAnalise) por exemplo.

In [3]:
#| export
import os
from pathlib import Path
import json
from typing import Union
from datetime import datetime

import pandas as pd
from fastcore.test import *
from rich import print
import pyodbc
from pymongo import MongoClient
from dotenv import load_dotenv, find_dotenv

from extracao.constants import APP_ANALISE_PT, APP_ANALISE_EN
from extracao.reading import read_base, read_aero
from extracao.format import merge_close_rows


In [4]:
#| export
LIMIT_FREQ = 84812.50
load_dotenv(find_dotenv())

True

In [5]:
#| export
def _filter_matlab(
    df: pd.DataFrame,  # Arquivo de Dados Base de Entrada
) -> pd.DataFrame:  # Arquivo de Dados formatado para leitura no Matlab
    """Recebe a base de dados da Anatel e formata as colunas para leitura de acordo com os requisitos do Matlab"""
    df["#Estação"] = df["Número_Estação"]
    df.loc[df.Multiplicidade != "1", "#Estação"] = (
        df.loc[df.Multiplicidade != "1", "Número_Estação"]
        + "+"
        + df.loc[df.Multiplicidade != "1", "Multiplicidade"]
    )
    cols_desc = [
        "Fonte",
        "Status",
        "Classe",
        "Entidade",
        "Fistel",
        "#Estação",
        "Município_IBGE",
        "UF",
    ]
    df.loc[:, cols_desc].fillna("NI", inplace=True)

    df["Descrição"] = (
        "["
        + df.Fonte
        + "] "
        + df.Status
        + ", "
        + df.Classe
        + ", "
        + df.Entidade.str.title()
        + " ("
        + df.Fistel
        + ", "
        + df["#Estação"]
        + "), "
        + df.Município_IBGE
        + "/"
        + df.UF
    )

    bad_coords = df.Coords_Valida_IBGE == "0"

    df.loc[bad_coords == "False", "Descrição"] = (
        df.loc[bad_coords == "False", "Descrição"] + "*"
    )

    df.loc[bad_coords, ["Latitude", "Longitude"]] = df.loc[
        bad_coords, ["Latitude_IBGE", "Longitude_IBGE"]
    ].values

    df = df.loc[:, APP_ANALISE_PT]
    df.columns = APP_ANALISE_EN
    return df


def _format_matlab(
    df: pd.DataFrame,  # Arquivo de Dados Base de Entrada
) -> pd.DataFrame:  # Arquivo de Dados formatado para leitura no Matlab
    """Formata o arquivo final de dados para o formato esperado pela aplicação em Matlab"""
    for c in ["Latitude", "Longitude"]:
        df[c] = df[c].fillna(-1).astype("float32")
        
    df["Frequency"] = df["Frequency"].astype("float64")
    df.loc[df.Service.isin(["", "-1"]), "Service"] = pd.NA
    df["Service"] = df.Service.fillna("-1").astype("int16")
    df.loc[df.Station.isin(["", "-1"]), "Station"] = pd.NA
    df["Station"] = df.Station.fillna("-1").astype("int32")
    df.loc[df.BW.isin(["", "-1"]), "BW"] = pd.NA
    df["BW"] = df["BW"].astype("float32").fillna(-1)
    df.loc[df["Class"].isin(["", "-1"]), "Class"] = pd.NA
    df["Class"] = df.Class.fillna("NI").astype("category")
    df = (
        df.drop_duplicates(keep="first")
        .sort_values(by=["Frequency", "Latitude", "Longitude"])
        .reset_index(drop=True)
    )
    df["Id"] = [f"#{i+1}" for i in df.index]
    df["Id"] = df.Id.astype("string")
    df.loc[df.Description == "", "Description"] = pd.NA
    df["Description"] = df["Description"].astype("string").fillna("NI")
    df = df[df.Frequency <= LIMIT_FREQ]
    return df[["Id"] + list(APP_ANALISE_EN)]


In [6]:
#| export
def get_db(
    path: Union[str, Path],  # Pasta onde salvar os arquivos",
    connSQL: pyodbc.Connection = None,  # Objeto de conexão do banco SQL Server
    clientMongoDB: MongoClient = None,  # Objeto de conexão do banco MongoDB
) -> pd.DataFrame:  # Retorna o DataFrame com as bases da Anatel e da Aeronáutica
    """Lê e opcionalmente atualiza as bases da Anatel, mescla as bases da Aeronáutica, salva e retorna o arquivo
    A atualização junto às bases de dados da Anatel é efetuada caso ambos objetos de banco `connSQL` e `clientMongoDB` forem válidos`
    """
    dest = Path(path)
    dest.mkdir(parents=True, exist_ok=True)
    print(":scroll:[green]Lendo as bases de dados da Anatel...")
    df = read_base(path, connSQL, clientMongoDB)
    df = _filter_matlab(df)
    mod_times = {"ANATEL": datetime.now().strftime("%d/%m/%Y %H:%M:%S")}
    print(":airplane:[blue]Requisitando os dados da Aeronáutica.")
    update = all([connSQL, clientMongoDB])
    aero = read_aero(path, update=update)
    mod_times["AERONAUTICA"] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    print(":spoon:[yellow]Mesclando os dados da Aeronáutica.")
    df = merge_close_rows(df, aero)
    df.loc[len(df), :] =  [-1,  -15.7801,  -47.9292, "[TEMP] L, FX, Estação do SMP licenciada (cadastro temporário)", "10", "999999999", 'NI', "-1"] #Paliativo...
    df = _format_matlab(df)
    print(":card_file_box:[green]Salvando os arquivos...")
    df.to_parquet(f"{dest}/AnatelDB.parquet.gzip", compression="gzip", index=False)
    versiondb = json.loads((dest / "VersionFile.json").read_text())
    mod_times["ReleaseDate"] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    versiondb["anateldb"].update(mod_times)
    json.dump(versiondb, (dest / "VersionFile.json").open("w"))
    print("Sucesso :zap:")
    return df

In [7]:
#| eval:false
import os
import warnings
from extracao.updates import connect_db

In [8]:
#| eval:false
warnings.filterwarnings("ignore")

In [9]:
#| eval:false
folder = Path.cwd().parent / 'dados'

In [10]:
#| eval:false
# conn = connect_db()
# uri = os.environ['MONGO_URI']
# mongo_client = MongoClient(uri)
# mongo_client.server_info()

In [10]:
#| eval:false
df = pd.read_parquet(folder / 'ANATELDB.parquet.gzip')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 819686 entries, 0 to 819685
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype   
---  ------       --------------   -----   
 0   Id           819686 non-null  string  
 1   Frequency    819686 non-null  float64 
 2   Latitude     819686 non-null  float64 
 3   Longitude    819686 non-null  float64 
 4   Description  819686 non-null  string  
 5   Service      819686 non-null  int16   
 6   Station      819686 non-null  int32   
 7   Class        819686 non-null  category
 8   BW           819686 non-null  float32 
dtypes: category(1), float32(1), float64(3), int16(1), int32(1), string(2)
memory usage: 39.9 MB


In [11]:
#| eval:false
df.head()

Unnamed: 0,Id,Frequency,Latitude,Longitude,Description,Service,Station,Class,BW
0,#1,-1.0,-15.7801,-47.929199,"[TEMP] L, FX, Estação do SMP licenciada (cadas...",10,999999999,NI,-1.0
1,#2,0.028,-22.662779,-43.476391,"[MOS] L, OP, Furnas Centrais Eletricas S A (01...",19,1557670,J9E,8.0
2,#3,0.03,-23.709999,-46.273335,"[MOS] L, OP, Furnas Centrais Eletricas S A (01...",19,1558412,J3E,2.0
3,#4,0.03,-23.441668,-46.590832,"[MOS] L, OP, Furnas Centrais Eletricas S A (01...",19,1557823,J3E,1.0
4,#5,0.03,-22.926666,-43.264999,"[MOS] L, OP, Furnas Centrais Eletricas S A (01...",19,859761,J3E,0.5


In [12]:
#| eval:false
df.tail()

Unnamed: 0,Id,Frequency,Latitude,Longitude,Description,Service,Station,Class,BW
955112,#955113,84125.0,-20.355619,-40.39558,"[MOS] L, FX, Claro S.A. (50418766738, 10145803...",19,1014580355,Q7W,2000000.0
955113,#955114,84187.5,-20.326571,-40.362171,"[MOS] L, FX, Claro S.A. (50418766738, 10143002...",19,1014300220,Q7W,2000000.0
955114,#955115,84187.5,-20.326571,-40.362171,"[MOS] L, FX, Claro S.A. (50418766738, 10143002...",19,1014300220,Q7W,2000000.0
955115,#955116,84812.5,-12.981688,-38.454357,"[MOS] L, FX, Computadores E Sistemas Ltda (504...",19,1011928598,Q7W,62500.0
955116,#955117,84812.5,-12.978972,-38.461666,"[MOS] L, FX, Computadores E Sistemas Ltda (504...",19,1011928610,Q7W,62500.0


In [18]:
#| eval:false
df.to_parquet(folder / 'ANATELDB.parquet.gzip', compression='gzip', index=False)

In [11]:
#|eval: false
df = get_db(folder)
#df = get_db(folder, conn, mongo_client)

Output()

In [12]:
#| eval:false
# df.loc[(df.Station == 688022243) & (df.Frequency == 161.07)]

In [13]:
#|eval: false
# from pandas_profiling import ProfileReport
# df['Frequency'] = df['Frequency'].astype('category')
# profile = ProfileReport(df, config_file='report_config.yaml')
# profile.to_notebook_iframe()

In [14]:
#|eval: false
# from extracao.constants import MIN_LAT, MIN_LONG, MAX_LAT, MAX_LONG
# df.loc[~(df.Latitude.between(MIN_LAT,MAX_LAT) & df.Longitude.between(MIN_LONG, MAX_LONG))]