In [1]:
# !pip install awswrangler

In [2]:
import awswrangler as wr
import pandas as pd
import numpy as np
import boto3
import logging
import os
from botocore.exceptions import ClientError

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

s3 = boto3.client('s3')
os.environ['snsARN'] = 'arn:aws:sns:ap-northeast-2:630190875472:temp_marketstatus-lambda-error-topic'  

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [3]:
### Domestic 파일
bucket = 'marketstatus-domestic'
obj_list = s3.list_objects(Bucket = bucket)
key_list = [i['Key'] for i in obj_list['Contents'] if ('raw' in i['Key']) and (('.xlsx' in i['Key']) or ('.xls' in i['Key']))]        


In [8]:
### Domestic 파일
bucket = 'marketstatus-domestic'
obj_list = s3.list_objects(Bucket = bucket)
key_list = [i['Key'] for i in obj_list['Contents'] if ('raw' in i['Key']) and (('.xlsx' in i['Key']) or ('.xls' in i['Key']))]        

for key_ in key_list[-1:]:
    key = key_
    ############## Lambda Code ##############   
    print(key)
    ## Bring Sheet Name
    _, year, month, filename = key.split('/')
    path = f's3://{bucket}/{key}' ## Data path
    
    ## Domestic Data Load
    df = wr.s3.read_excel(path, 
                          sheet_name=3,
                          header=4,
                          usecols='A,D,E:F,H,J',
                          skiprows=[5,],
                          skipfooter=4,
                          names=['Company', 'Category', 'Model', 'Production', 'Domestic', 'Export'],
                         )
    
    ## 불필요 데이터 삭제 및 문자열 형식 변경
    df = df[df['Model'].notnull()]
    df = df[~df['Model'].str.endswith('계')]
    df = df[(~df['Model'].str.contains('국산')) & (~df['Model'].str.contains('OEM 수입'))]    
    df['Company'] = df['Company'].str.replace(' ', '')
    df['Model'] = df['Model'].str.replace('\n', '')  
    df['Model'] = df['Model'].str.replace('  ', ' ')
    df['Model'] = df['Model'].str.lstrip()
    df['Model'] = df['Model'].str.rstrip()    
    df.loc[df['Company'] == '','Company'] = np.NaN
    
    ## Company / Master Data Load        
    company = wr.s3.read_csv('s3://marketstatus-metas/company/') ## Company Data Load
    brand = wr.s3.read_csv('s3://marketstatus-metas/brand/') ## Brand Data Load
    master = wr.s3.read_csv('s3://marketstatus-metas/master/') ## Master Data Load
    master['Model (until 2021)'] = master['Model (until 2021)'].astype('str')
    master['Model (until 2021)'] = master['Model (until 2021)'].str.replace('  ', ' ')
    master['Model (until 2021)'] = master['Model (until 2021)'].str.lstrip()
    master['Model (until 2021)'] = master['Model (until 2021)'].str.rstrip() 
    master['Model'] = master['Model'].astype('str')
    master['Model'] = master['Model'].str.replace('  ', ' ')
    master['Model'] = master['Model'].str.lstrip()
    master['Model'] = master['Model'].str.rstrip()        

    ## Check Company Data with Meta Data
        ## Company 데이터가 Meta 데이터에 없고 NaN값도 아닐 경우 에러 발생 | 정상일 경우 Company명 바꿈
    company_error = df[~((df['Company'].isin(company['Company_raw'])) | (df['Company'].isin(company['Company'])) | (df['Company'].isna()))]        
    if len(company_error) > 0:
        raise ValueError(f'No such value in meta table: {company_error["Company"].unique()}')
    company = company.set_index('Company_raw').to_dict()['Company']
    df = df.replace({"Company":company})
    
    ## 공백값 채우기 & 데이터 형변환
    df[['Company', 'Category']] = df[['Company', 'Category']].fillna(method='ffill')
    df[['Production', 'Domestic', 'Export']] = df[['Production', 'Domestic', 'Export']].fillna(0).astype(int)
    df['Brand'] = df['Company']
    df['Year'] = int(year)
    df['Month'] = int(month)
    df['import_or_domestic'] = 'Domestic'
    df['sales_or_not'] = 'Domestic'
    
    ## Check Brand Data with Meta Data
    ## Brand 데이터가 Meta 데이터에 없고 NaN값도 아닐 경우 에러 발생 | 정상일 경우 Brand명 바꿈
    brand_error = df[~((df['Brand'].isin(brand['Brand_raw'])) | (df['Brand'].isin(brand['Brand'])) | (df['Brand'].isna()))]        
    if len(brand_error) > 0:
        raise ValueError(f'No such value in meta table: {brand_error["Brand"].unique()}')
    brand = brand.set_index('Brand_raw').to_dict()['Brand']
    df = df.replace({"Brand":brand})
    
    ## Genesis 데이터 처리
    df.loc[df['Model'].isin(master[master['Brand'] == 'Genesis']['Model']),'Brand'] = 'Genesis'
    
    ## Check Model Data with Meta Data
    ## Model 데이터가 Meta 데이터에 없고 NaN값도 아닐 경우 에러 발생 | 정상일 경우 Model명 바꿈
    df['Model'] = df['Model'].astype('str')
    model_error = df[~((df['Model'].isin(master['Model (until 2021)'])) | (df['Model'].isin(master['Model'])) | (df['Model'].isna()))]           
    if len(model_error) > 0:        
        raise ValueError(f"No such value in meta table(Company, Brand, Model): {model_error[['Company', 'Brand', 'Model']].drop_duplicates().values}")
    model = master.set_index('Model (until 2021)').to_dict()['Model']
    df = df.replace({"Model":model})                   
    
#     ## 전처리 데이터 저장
#     write_path = f's3://{bucket}/domestic-preprocessed/{year}/{month}/domestic.parquet'
#     wr.s3.to_parquet(df, write_path, index=False)
    
#     ## Drop exist data ###   
#     sess = boto3.Session(region_name='us-east-1')
#     query = f"""DELETE FROM marketstatus.models_iceberg 
#                 WHERE year = {year} 
#                 AND month = {month} 
#                 AND import_or_domestic = 'Domestic' 
#                 AND sales_or_not = 'Domestic'"""
#     query_exec_id = wr.athena.start_query_execution(sql=query, database='marketstatus', boto3_session=sess, wait=True)
    
#     ## Data Insert To Iceberg Table ###         
#     df_array = df[['Year', 'Month', 'import_or_domestic', 'Company', 'Brand', 'Model', 'Domestic', 'sales_or_not']].to_numpy()    
#     query = """INSERT INTO marketstatus.models_iceberg VALUES """
#     for r in df_array:
#         query += f"({r[0]}, {r[1]}, '{r[2]}', '{r[3]}', '{r[4]}', '{r[5]}', {r[6]},'{r[7]}'),\n"
#     query = query[:-2]    
#     query_exec_id = wr.athena.start_query_execution(sql=query, database='marketstatus', boto3_session=sess, wait=True)

raw/2023/02/Monthly2023-02.xlsx


ValueError: No such value in meta table(Company, Brand, Model): [['Hyundai' 'Hyundai' 'AVANTE(CN7) EXPORT']
 ['Hyundai' 'Hyundai' 'i30(PD) EXPORT']
 ['Hyundai' 'Hyundai' 'LF SONATA EXPORT']
 ['Hyundai' 'Hyundai' 'SONATA(DN8) EXPORT']
 ['Hyundai' 'Hyundai' 'G70 2.5T']
 ['Hyundai' 'Hyundai' 'G70 EXPORT']
 ['Hyundai' 'Hyundai' 'G80(RG3) EXPORT']
 ['Hyundai' 'Hyundai' 'GRANDEUR IG EXPORT']
 ['Hyundai' 'Hyundai' 'GRANDEUR(GN7) EXPORT']
 ['Hyundai' 'Hyundai' 'G90(RS4) EXPORT']
 ['Hyundai' 'Hyundai' 'VENUE EXPORT']
 ['Hyundai' 'Hyundai' 'KONA(OS) EXPORT']
 ['Hyundai' 'Hyundai' 'KONA(SX2) 1.6 HEV']
 ['Hyundai' 'Hyundai' 'KONA(SX2) EV']
 ['Hyundai' 'Hyundai' 'KONA(SX2) EXPORT']
 ['Hyundai' 'Hyundai' 'TUCSON(NX4) 1.6 PHEV']
 ['Hyundai' 'Hyundai' 'TUCSON(NX4) EXPORT']
 ['Hyundai' 'Hyundai' 'SANTAFE(TM) 1.6T PHEV']
 ['Hyundai' 'Hyundai' 'SANTAFE(TM) EXPORT']
 ['Hyundai' 'Hyundai' 'PALISADE EXPORT']
 ['Hyundai' 'Hyundai' 'GV70 EXPORT']
 ['Hyundai' 'Hyundai' 'GV80 EXPORT']
 ['Hyundai' 'Hyundai' 'GRAND STAREX EXPORT']
 ['Hyundai' 'Hyundai' 'STARIA(US4) EXPORT']
 ['Hyundai' 'Hyundai' 'COUNTY EXPORT']
 ['Hyundai' 'Hyundai' '대형버스 EXPORT']
 ['Hyundai' 'Hyundai' 'MIGHTY EXPORT']
 ['Hyundai' 'Hyundai' 'PORTER EXPORT']
 ['Hyundai' 'Hyundai' '5T EXPORT']
 ['Hyundai' 'Hyundai' '대형트럭 FCEV']
 ['Hyundai' 'Hyundai' '대형트럭 EXPORT']
 ['Hyundai' 'Hyundai' 'MIXER']
 ['Hyundai' 'Hyundai' 'PULL CARGO']
 ['Hyundai' 'Hyundai' 'TRACTOR']
 ['Hyundai' 'Hyundai' '8X4 DUMP']
 ['Hyundai' 'Hyundai' '특장기타']
 ['Kia' 'Kia' 'MORNING(JA) EXPORT']
 ['Kia' 'Kia' 'PRIDE(YB) EXPORT']
 ['Kia' 'Kia' 'K3(BD) EXPORT']
 ['Kia' 'Kia' 'K5(JF) EXPORT']
 ['Kia' 'Kia' 'K5(DL3) 2.0 PHEV']
 ['Kia' 'Kia' 'K5(DL3) EXPORT']
 ['Kia' 'Kia' 'K8(GL3) EXPORT']
 ['Kia' 'Kia' 'K9(RJ) EXPORT']
 ['Kia' 'Kia' 'STINGER EXPORT']
 ['Kia' 'Kia' 'SOUL BOOSTER EXPORT']
 ['Kia' 'Kia' 'STONIC EXPORT']
 ['Kia' 'Kia' 'NIRO(SG2) 1.6 PHEV']
 ['Kia' 'Kia' 'SELTOS EXPORT']
 ['Kia' 'Kia' 'SPORTAGE(NQ5) 1.6 PHEV']
 ['Kia' 'Kia' 'SPORTAGE(NQ5) EXPORT']
 ['Kia' 'Kia' 'SORENTO(MQ4) EXPORT']
 ['Kia' 'Kia' 'EV9']
 ['Kia' 'Kia' 'MOHAVE EXPORT']
 ['Kia' 'Kia' 'CARNIVAL(KA4) EXPORT']
 ['Kia' 'Kia' '실크로드캄']
 ['Kia' 'Kia' 'IT']
 ['Kia' 'Kia' '1.2/1.4T']
 ['Kia' 'Kia' '특장기타']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'NG MALIBU EXPORT']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'TRAX 1.8']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'TRAX EXPORT']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'TRAX(CUV) 1.2T']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'TRAX(CUV) EXPORT']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'TRAILBLAZER EXPORT']
 ['GM Korea(Domestic)' 'GM Korea(Domestic)' 'SIERRA 6.2*']
 ['Ssangyong' 'Ssangyong' 'TIVOLI 1.2']
 ['Ssangyong' 'Ssangyong' 'TIVOLI EXPORT']
 ['Ssangyong' 'Ssangyong' 'KORANDO(C300) 2.0']
 ['Ssangyong' 'Ssangyong' 'KORANDO(C300) EXPORT']
 ['Ssangyong' 'Ssangyong' 'REXTON 2.0 GSL']
 ['Ssangyong' 'Ssangyong' 'REXTON 3.2 GSL']
 ['Ssangyong' 'Ssangyong' 'REXTON EXPORT']
 ['Ssangyong' 'Ssangyong' 'REXTON SPORTS 2.0 GSL']
 ['Ssangyong' 'Ssangyong' 'REXTON SPORTS EXPORT']
 ['Renault Samsung' 'R. Samsung' 'NEW SM3 EXPORT']
 ['Renault Samsung' 'R. Samsung' 'NEW SM3 EV EXPORT']
 ['Renault Samsung' 'R. Samsung' 'SM6 EXPORT']
 ['Renault Samsung' 'R. Samsung' 'XM3 EXPORT']
 ['Renault Samsung' 'R. Samsung' 'QM6 2.0 LPe QUEST']
 ['Renault Samsung' 'R. Samsung' 'QM6 EXPORT']
 ['Daewoo Bus' 'Daewoo'
  'BS (BS090, BF106, BS106,  BV120MA, BS110CN, BS120CN, BC211M)']
 ['Daewoo Bus' 'Daewoo' 'ROYAL CITY EXPORT']
 ['Daewoo Bus' 'Daewoo' 'ROYAL EXPORT']
 ['Tata Daewoo' 'Tata Daewoo' '대형트럭 EXPORT']
 ['Tata Daewoo' 'Tata Daewoo' 'MIXER']
 ['Tata Daewoo' 'Tata Daewoo' 'TRACTOR']]