In [0]:
import os
import logging
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from datetime import datetime, timedelta
import requests

### UDF

In [0]:

def generate_presigned_url_eic(bucket_name, object_name, expiration):

    s3_client=boto3.client(
        "s3",
        aws_access_key_id=aws_access_key,
        aws_secret_access_key=aws_access_secret_key,
        region_name = "eu-west-1",
        config = Config(signature_version="s3v4")
    )

    try:
        response = s3_client.generate_presigned_url(
            "get_object",
            Params={
                "Bucket" : bucket_name,
                "Key" : object_name
            },
            ExpiresIn=expiration
        )
    except ClientError as e:
        logging.error(e)
        # print("AWS 자격 증명을 찾을 수 없습니다.")
        return None
    return response

In [0]:
def send_email(subject, content, mail_to, mail_from, mail_cc):

    airflow_pw = dbutils.secrets.get("admin", "password_airflow_pw")

    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json',
    }

    json_data = {
        "conf" : {
            'mail_from' : mail_from,
            'mail_to' : mail_to,
            'mail_cc' : mail_cc,
            'subject' : subject,
            'content_str' : content,
            'content_type' : 'plain',
        }
    }

    response = requests.post(
        'https://airflow-kic.hedataplfm.com/api/v1/dags/smtp_call_str/dagRuns',
        headers=headers,
        json=json_data,
        auth=('admin', airflow_pw)
    )

    if response.status_code != 200:
        print(response.text)

### main

In [0]:
# 이전 task 값 받아오기 
prev_task = dbutils.widgets.get('prev_task')
kst_date  = dbutils.jobs.taskValues.get(prev_task, "kst_date")

# access_key
aws_access_key = dbutils.secrets.get("admin","aws_common_access_key")
aws_access_secret_key = dbutils.secrets.get("admin","aws_common_secret_access_key") 

# csv s3_path
bucket_ls = ['s3-lge-he-inbound-eic-dev']
key = f"HEDS/{prev_task}/{kst_date}"

# mail to/from/cc
mail_to = dbutils.widgets.get('mail_to')
mail_from = dbutils.widgets.get('mail_from')
mail_cc = dbutils.widgets.get('mail_cc')

In [0]:
# 해당 prefix 하위의 파일을 모두 탐색한다.
file_list=[]
s3=boto3.resource(
    's3',
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_access_secret_key
)
for bucket_nm in bucket_ls:
    bucket=s3.Bucket(bucket_nm)
    for obj in bucket.objects.filter(Prefix=key):
        if len(obj.key.split('/')[-1])>0:
            if obj.key.split('.')[-1] == 'csv' : # csv 만
                file_list.append((bucket_nm, obj.key))   

list_eic_url = []
for bucket_name, object_name in file_list:
    eic_url = generate_presigned_url_eic(bucket_name, object_name, 259200)
    list_eic_url.append(eic_url)


In [0]:

if len(list_eic_url) > 0:

    subject = f'[MSDP][Jira-1218] Onvo 55UA84 이슈 관련 TV로그 주기적 추출 요청 ({kst_date})'
    content = f"""
    안녕하십니까? msdp입니다.

    요청하신 데이터 파일 전달 드립니다.
    다운로드 가능 기간은 현시점으로부터 72시간 내에만 가능합니다.

    - 요청 jira : 
       http://jira.lge.com/issue/browse/HEDATAPLFM-1218
    - 데이터 파일 : 
    {"\n".join("   " + f"{eic_url}" for eic_url in list_eic_url)}

    정기적 추출 중단 필요하시면, Jira Comment 또는 msdp@lge.com 메일 회신 부탁 드립니다.
    감사합니다.
    """

    send_email(subject, content, mail_to, mail_from, mail_cc)

else:
    raise '파일이 존재하지 않는 권역이 존재합니다.'