In [6]:
import requests
import datetime as dt
import base64
import time as tm
from zipfile import ZipFile
import logging
import boto3
from botocore.exceptions import ClientError
import os
from dotenv import load_dotenv, find_dotenv

In [7]:
env_file = find_dotenv()
print(env_file)
load_dotenv(find_dotenv())

/home/david/eafit/Trabajo1_Almdatos/.env


True

In [8]:
def upload_file_to_s3(file_name, bucket, object_name = None):
    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        s3_client.upload_file(file_name, bucket, object_name, ExtraArgs={'ACL':'bucket-owner-full-control'})
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [9]:
def extract_file(file_name, output_path):
    with ZipFile(file_name, "r") as zipFile:
        zipFile.extract(
            "excel.csv.csv", output_path)
    zipFile.close()

In [10]:
def submit_job(field = "TEMPERATURA", label = "TMX_CON"):
    submit_job_url = "http://dhime.ideam.gov.co/server/rest/services/AtencionCiudadano/DescargarArchivo/GPServer/DescargarArchivo/submitJob"

    headers = {
        "Accept": "*/*",
        "Accept-Language": "en-US,en;q=0.9",
        "Content-Type": "application/x-www-form-urlencoded",
        "Origin": "http://dhime.ideam.gov.co",
        "Referer": "http://dhime.ideam.gov.co/atencionciudadano/",
        "Sec-GPC": "1",
    }

    current_datetime = dt.datetime.now()
    previous_date = current_datetime - dt.timedelta(days=1)
    day = previous_date.day
    month = previous_date.month
    year = previous_date.year
    data = f"f=json&Filtro=sort%3D%26filter%3D((IdParametro~eq~%27{field}%27~and~Etiqueta~eq~%27{label}%27~and~IdEstacion~eq~%2726205080%27)~or~(IdParametro~eq~%27{field}%27~and~Etiqueta~eq~%27{label}%27~and~IdEstacion~eq~%2727015330%27)~or~(IdParametro~eq~%27{field}%27~and~Etiqueta~eq~%27{label}%27~and~IdEstacion~eq~%2727010810%27))%26group%3D%26fechaInicio%3D{year}-{month}-{day}T05%253A00%253A00.000Z%26fechaFin%3D{year}-{month}-{day}T05%253A00%253A00.000Z%26mostrarGrado%3Dtrue%26mostrarCalificador%3Dtrue%26mostrarNivelAprobacion%3Dtrue&Items=%5B%7B%22IdParametro%22%3A%22{field}%22%2C%22Etiqueta%22%3A%22{label}%22%2C%22EsEjeY1%22%3Afalse%2C%22EsEjeY2%22%3Afalse%2C%22EsTipoLinea%22%3Afalse%2C%22EsTipoBarra%22%3Afalse%2C%22TipoSerie%22%3A%22Estandard%22%2C%22Calculo%22%3A%22%22%7D%2C%7B%22IdParametro%22%3A%22{field}%22%2C%22Etiqueta%22%3A%22{label}%22%2C%22EsEjeY1%22%3Afalse%2C%22EsEjeY2%22%3Afalse%2C%22EsTipoLinea%22%3Afalse%2C%22EsTipoBarra%22%3Afalse%2C%22TipoSerie%22%3A%22Estandard%22%2C%22Calculo%22%3A%22%22%7D%2C%7B%22IdParametro%22%3A%22{field}%22%2C%22Etiqueta%22%3A%22{label}%22%2C%22EsEjeY1%22%3Afalse%2C%22EsEjeY2%22%3Afalse%2C%22EsTipoLinea%22%3Afalse%2C%22EsTipoBarra%22%3Afalse%2C%22TipoSerie%22%3A%22Estandard%22%2C%22Calculo%22%3A%22%22%7D%5D"


    response = requests.post(
        f"{submit_job_url}?{data}",
        headers=headers,
        verify=False,
    )
    response_json = response.json()
    job_id = response_json["jobId"]
    job_status = response_json["jobStatus"]

    print(f"Job ID: {job_id}")
    print(f"Job Status: {job_status}")
    return job_id

In [11]:
def verify_status_job(job_id):
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.9',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Referer': 'http://dhime.ideam.gov.co/atencionciudadano/',
        'Sec-GPC': '1',
    }

    timestamp = int(tm.time())

    response_get_status = requests.get(
        f"http://dhime.ideam.gov.co/server/rest/services/AtencionCiudadano/DescargarArchivo/GPServer/DescargarArchivo/jobs/{job_id}?f=json&dojo.preventCache={timestamp}",
        headers=headers,
        verify=False,
    )

    response_get_status_json = response_get_status.json()
    print(f"Job status: {response_get_status_json['jobStatus']}")

In [12]:
def download_file_from_api(job_id, zip_file_name = "datos.zip"):
    headers = {
        "Accept": "*/*",
        "Accept-Language": "en-US,en;q=0.9",
        "Content-Type": "application/x-www-form-urlencoded",
        "Referer": "http://dhime.ideam.gov.co/atencionciudadano/",
        "Sec-GPC": "1",
    }

    params = {
        "f": "json",
        "returnType": "data",
    }

    response_get_file = requests.get(
        f"http://dhime.ideam.gov.co/server/rest/services/AtencionCiudadano/DescargarArchivo/GPServer/DescargarArchivo/jobs/{job_id}/results/Archivo",
        params=params,
        headers=headers,
        verify=False,
    )

    response_json_get_file = response_get_file.json()
    file_content = response_json_get_file["value"]

    try:
        decoded_file_content = base64.b64decode(file_content)
        with open(zip_file_name, "wb") as f:
            f.write(decoded_file_content)
    except Exception as e:
        print(str(e))

In [14]:
job_id = submit_job(field = "TEMPERATURA", label = "TMX_CON")

Job ID: j7cec8f0266e84645a894943e5b8b1ffc
Job Status: esriJobSubmitted


In [15]:
verify_status_job(job_id)

Job status: esriJobSucceeded


In [16]:
download_file_from_api(job_id, zip_file_name = "datos.zip")

In [17]:
s3_bucket_name = "climaticchange-datalake"
zip_file_name = "datos.zip"
output_path = "datos"
s3_object_name = "datos.csv"
download_file_from_api(zip_file_name)
extract_file(zip_file_name, output_path)
upload_file_to_s3(f"{output_path}/excel.csv.csv", s3_bucket_name, s3_object_name)

True