In [None]:
import json
from datetime import datetime,timezone,timedelta
from token_transformations import get_token
from pyspark.sql.functions import collect_list,desc,col
import aiohttp
import asyncio
import nest_asyncio

In [None]:
systemSource = dbutils.widgets.get("systemSource")
tableSource = dbutils.widgets.get("tableSource")
isIncremental = dbutils.widgets.get("isIncremental")
isSensitive = dbutils.widgets.get("isSensitive")
isDev = dbutils.widgets.get("isDev")
trigger = dbutils.widgets.get("trigger")
schemaName = dbutils.widgets.get("schemaName")

In [None]:
assert None not in [systemSource,tableSource,isDev,isSensitive,isIncremental,schemaName], "None is not a valid input"
isDev = str(isDev).strip().capitalize()
tableSource = str(tableSource).strip()
isSensitive = str(isSensitive).strip().capitalize()
systemSource = str(systemSource).strip().lower()
isIncremental = str(isIncremental).strip().capitalize()
schemaName = str(schemaName).strip().lower()
assert bool(systemSource), f"systemSource: '' is not a valid input"
assert isSensitive != "" and (isSensitive in ("True","False","0","1")), f"isSensitive: {isSensitive} is not a valid input, expected True or False"
assert bool(tableSource), f"tableSource:  '' is not a valid input"
assert isDev != "" and (isDev in ("True","False","0","1")), f"isDev: {isDev} is not a valid input, expected True or False"
assert isIncremental != "" and (isIncremental in ("True","False","0","1")), f"isIncremental: {isIncremental} is not a valid input, expected True or False"
assert bool(schemaName), f"schemaName:  '' is not a valid input"
isDev = bool(eval(isDev))
isSensitive = bool(eval(isSensitive))
sensitive = 'sensitive' if isSensitive else 'general'
isIncremental = bool(eval(isIncremental))
incremental = 'incremental' if isIncremental else 'snapshot'
if not isDev:
    enviroment = "prod"
    env_path = ""
    schema_adb = 'staging'
    token_table = "landing.control.token_control"
if isDev:
    enviroment = "dev"
    env_path = "/dev"  
    schema_adb = 'dev'
    token_table = "landing.dev.token_control"

now = datetime.now()
year_created, month_created, day_created, hour_created, minute_created, second_created =(now.year,now.month,now.day,now.hour,now.minute,now.second)

if tableSource == 'resources/hosts_services':
    select_table = f'{enviroment}.silver.centreon_apirest_monitoring_hosts_service_id'

elif tableSource == 'resources/hosts':
    select_table =f'{enviroment}.silver.centreon_apirest_monitoring_hosts_id'

path_raw = f"abfss://landing@strawdpcdiaprodbrs.dfs.core.windows.net{env_path}/{sensitive}/{systemSource}/{incremental}/{schemaName}/{tableSource}/{year_created}/{month_created}/{day_created}/{hour_created}/{minute_created}/{second_created}"
tableSource = tableSource.replace('/','_')
test_url = 'https://monitoramento.rnp.br/centreon/api/latest/monitoring/hosts/categories'

In [None]:
token = (spark.table(token_table)
            .orderBy(desc("date_generation"))
            .select("token")).first()[0]
headers = {
            'Accept': 'application/json',
            'X-AUTH-TOKEN': token
        }

In [None]:
list_url = spark.table(select_table).select(collect_list("url")).first()[0]

In [None]:
##consulta assincrona

nest_asyncio.apply()
async def get_new_token():

            get_token(enviroment,token_table)
            token = (spark.table(token_table)
            .orderBy(desc("date_generation"))
            .select("token")).first()[0]
            headers['X-AUTH-TOKEN'] = token

async def test_token_validation(session, url, headers):
    async with session.get(url, headers=headers) as resp:
        return resp.status 

       
async def fetch_data(session, url, headers):
    try:
        async with session.get(url, headers=headers) as resp:
            if resp.status != 200:
                return {"_ingestion_url": url, "_ingestion_ts": datetime.now(timezone(timedelta(hours=-3))).isoformat(),"code":resp.status}
            
            response = await resp.json(content_type=None)
            
            if response.get('code') == 404:
                return {"_ingestion_url": url, "_ingestion_ts": datetime.now(timezone(timedelta(hours=-3))).isoformat(),"code":204}
            response['_ingestion_url'] = url
            response['_ingestion_ts'] = datetime.now(timezone(timedelta(hours=-3))).isoformat()
            response['code'] = resp.status
                
        return response
    except aiohttp.ClientError as e:
        print(f"An error occurred during the request: {e}, of {url}")
        return {"_ingestion_url": url, "_ingestion_ts":datetime.now(timezone(timedelta(hours=-3))).isoformat(),"code":resp.status}

async def main():
    conn = aiohttp.TCPConnector(limit=50,limit_per_host=0)
    async with aiohttp.ClientSession(connector=conn) as session:

        is_token_valid = await test_token_validation(session,test_url, headers)

        if is_token_valid == 401:
            await get_new_token()
        tasks = [fetch_data(session, url, headers) for url in list_url]
        results = await asyncio.gather(*tasks)
        return results
    
results = asyncio.run(main())


In [None]:
json_df = sc.parallelize(results).map(lambda x: json.dumps(x))
df = spark.read.json(json_df)
df = df.dropDuplicates()

In [None]:
df.write.format("json").mode("overwrite").option("overwriteSchema", "true").save(path_raw)

In [None]:
dbutils.jobs.taskValues.set(key = "ingestion_path", value = path_raw)