In [26]:
# api 요청으로 log 데이터 추출(json)
# 추출된 log 데이터 저장
# requests 모듈 사용

import requests
import os
import json

url = 'http://ec2-3-37-12-122.ap-northeast-2.compute.amazonaws.com/api/data/log'
JSON_LOG_PATH_NOTE = './json_log_practice.json'

# api 요청으로 json_log 받기
def get_json_log(url):
    json_log = requests.get(url).json()
    return json_log

json_log = get_json_log(url)

# 이미 이전에 받은 데이터는 제외하고 새로운 데이터만 반환
def except_duplicate(new, log_exist):
    for i in log_exist:
        if i == new:
            return False
    return True

# 받은 json_log 저장
def save_json_log(path, log):
    # path 를 가진 파일(로그가 저장된 파일)이 없을 경우, log 저장할 파일 생성
    if not os.path.isfile(path):
        with open(path, 'w') as f:
            json.dump(log, f, indent = 2)
    # path 를 가진 파일 있을 경우, 해당 파일에 이어서 log 저장
    else:
        with open(path, 'r') as f:
            log_exist = json.load(f)
        for i in log:
            if except_duplicate(i, log_exist):
                log_exist.append(i)
        with open(path, 'w') as f:
            json.dump(log, f, indent = 2)

save_json_log(JSON_LOG_PATH_NOTE, json_log)
    

In [27]:
# 받은 json_log 확인
json_log

[{'recordId': 48875,
  'ArrivalTimeStamp': 1678976923.238,
  'data': 'gAAAAABkErVXaYFajUzrzEACtz83HiBbu31SGfeAIW9KtaFPhLv7K0K5f1JHpeADEkwnLPtatAn9pvqDaAkRsihnuMB1HrSJXlabe6Zh09QuF8vGrhuXfrrUFaNPfTGIjYP4X-qwNLHW7_dKu-vUO8X1-pD-lN8cd9_Np-5Phv034fiJBJ3K7vtbt-7X467xtJVPGlJ7vaZpTLsggdGijldeMiBRVOfP_nCzJdOkkQxPJHJx--Hlw1BkoumzASdDlBSlkSewto5EAZIkzs-p1U8HNV5BzwINkNK4z3VPVmbCizkfCKL6yXSNeVRkuw1bPaDKtp41SuwH7OHm-RmldZEhnQBqnxVwS26TDfSRq9MS4SphlVB3iLKtDLvychWXTF_gatziZU79z25mkf0l5ro1S90dmaXMHmUXzVwlsUcc-34_vREUFqGiqIb7DZrxe6bBSMZ0J0Cm-6NGQthOg3UU0HBDm_TTRw=='},
 {'recordId': 48876,
  'ArrivalTimeStamp': 1678976923.526,
  'data': 'gAAAAABkErVXsNzMcUHJYdQg02m27RV3GF_6LWaGdq9e4qCH-27liTLD3MJY8cTsT7w7NkR1DSCgYVnWFOLznhh5QRwvR8HEDcFdQc6f10tquzyzl_DgaCRZ92EEmqEyWdsOy5WXfTm7JHYNOi0Pwn0pa-aGELa370O-6w9bLku11ITFvlk3dJQs3KH3829hJUOBjBeUqXRE42Bre0cf7_dM1YQfShn3TR-bYJalJvW0Z6qSqZ_DeGsyNxwgrUhYmjw_vLWjLlcTtMAPaXC5yjKuqnbs7O1SkS-_dwvY7f3ioIzfEL9BTbaw0XIYnQAqusFa9X2ZA8dz8WNq_tzBKmVullWtSKZyaU5gKZu7Lc6_SqNO6cmz

In [21]:
# 받은 log의 암호화된 데이터 복호화, 문자열 압축 알고리즘 적용 후 다시 암호화 및 저장
# 1. 암호화된 부분 복호화
# 2. b64uuid 모듈을 사용하여 'user_id' 64(32 / 32) 자 -> 44(22 /22) 자 압축
# 3. 'method' 를 value 별 숫자로 압축
# 4. 'url' 을 value 별 숫자로 압축
# 5. 'inDate' 다른 표기방식으로 압축
# 6. 암호화
# 7. 파일 저장(.json)

In [28]:
# 1. 암호화된 부분 복호화

from cryptography.fernet import Fernet

key = b't-jdqnDewRx9kWithdsTMS21eLrri70TpkMq2A59jX8='

def decrypt_data(log, key):
    fernet = Fernet(key)
    for i in log:
        decrypted_data = fernet.decrypt(i['data']).decode('ascii')
        i['data'] = decrypted_data
        # replace 로 ' -> " 를 변환해주는 이유는 json 모듈이 key 나 value 를 문자열로 인식하기 때문
        # json 모듈의 인식가능한 문자열은 반드시 ""로 감싸여져 있어야 한다(JavaScript 에서 유래된 모듈이다)
        # 문자열로 이루어진 'data' 의 복호화된 문자열 내부 data를 dict 로 변환
        i['data'] = json.loads(i['data'].replace("'", "\""))
    return

decrypt_data(json_log, key)

In [29]:
json_log

[{'recordId': 48875,
  'ArrivalTimeStamp': 1678976923.238,
  'data': {'user_id': 'b700bd6ebf3dadab5d3f7920c2730569e0e45b2f6d39a11b18505e0e7693a4ee',
   'record_id': 48875,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-16T14:28:43.238Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 48876,
  'ArrivalTimeStamp': 1678976923.526,
  'data': {'user_id': 'd2f483672c0239f6d7dd3c9ecee6deacbcd59185855625902a8b1c1a3bd67440',
   'record_id': 48876,
   'activity': 'cart',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-16T14:28:43.526Z',
   'detail': {'message': 'POST cart', 'levelname': 'INFO'}}},
 {'recordId': 48877,
  'ArrivalTimeStamp': 1678976923.813,
  'data': {'user_id': '84bdcd156603935b711cccd5a7c44b59a475e44429ab0f8a27ec18e175817436',
   'record_id': 48877,
   'activity': 'view',
   'url': '/api/products/product/',
   'met

In [30]:
# 2. b64uuid 모듈을 사용하여 'user_id' 64(32 / 32) 자 -> 44(22 /22) 자 압축
from b64uuid import B64UUID

def to_b64uuid(log):
    for i in log:
        i['data']['user_id'] = B64UUID(i['data']['user_id'][:32]).string + B64UUID(i['data']['user_id'][32:]).string
    return

to_b64uuid(json_log)


In [31]:
json_log

[{'recordId': 48875,
  'ArrivalTimeStamp': 1678976923.238,
  'data': {'user_id': 'twC9br89ratdP3kgwnMFaQ4ORbL205oRsYUF4OdpOk7g',
   'record_id': 48875,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-16T14:28:43.238Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 48876,
  'ArrivalTimeStamp': 1678976923.526,
  'data': {'user_id': '0vSDZywCOfbX3TyezuberAvNWRhYVWJZAqixwaO9Z0QA',
   'record_id': 48876,
   'activity': 'cart',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-16T14:28:43.526Z',
   'detail': {'message': 'POST cart', 'levelname': 'INFO'}}},
 {'recordId': 48877,
  'ArrivalTimeStamp': 1678976923.813,
  'data': {'user_id': 'hL3NFWYDk1txHMzVp8RLWQpHXkRCmrD4on7BjhdYF0Ng',
   'record_id': 48877,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-0

In [318]:
li_method = {}
method_num = 0

def trans_method(data, li_method, method_num):
    if data['data']['method'] not in li_method:
        item = data['data']['method']
        method_num += 1
        li_method[item] = method_num
        trans_method(data, li_method, method_num)

    else:
        data['data']['method'] = li_method[data['data']['method']]
        return
        
for data in log:
    trans_method(data, li_method, method_num)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.456Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.768Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': '/api/products/product/',
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:53.082

In [319]:
li_method

{'POST': 1}

In [320]:
li_url = {}

def trans_url(data, li_url):
    if data['data']['url'] not in li_url:
        item = data['data']['url']
        if item[23:]:
            li_url[item] = int(item[23:])
        else:
            li_url[item] = 0
        trans_url(data, li_url)

    else:
        data['data']['url'] = li_url[data['data']['url']]
        return
        
for data in log:
    trans_url(data, li_url)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.456Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.768Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:53.082Z',
   'detail': {'message': 'POST purchase', 'levelname': 'INFO'}}},

In [321]:
li_url

{'/api/products/product/': 0}

In [322]:
from datetime import datetime

def trans_indate(data):
    for i in data:
        inDate = i['data']['inDate']
        date = datetime.fromisoformat(inDate[:-1])
        outDate = date.strftime('%y%m%d%H%M%S%f')
        i['data']['inDate'] = outDate

trans_indate(log)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '230315144552456000',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '230315144552768000',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '230315144553082000',
   'detail': {'message': 'POST purchase', 'levelname': 'INFO'}}},
 {'recordId': 949

In [323]:
def encrypt_data(log):
    for i in log:
        json_log = i['data']
        fernet = Fernet(key)
        encrypt_str = fernet.encrypt(f'{json_log}'.encode('ascii'))
        i['data'] = encrypt_str.decode()

encrypt_data(log)
log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': 'gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK'},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': 'gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W'},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'd

In [324]:
import gzip

json_log = json.dumps(log)

with gzip.open('gzip_log.gz', 'wb') as f:
    f.write(json_log.encode('utf-8'))

with gzip.open('gzip_log.gz', 'rb') as f:
    read_log = f.read().decode('utf-8')

decomp_log = json.loads(read_log)

assert log == decomp_log

In [325]:
read_log

'[{"recordId": 9494, "ArrivalTimeStamp": 1678891552.456, "data": "gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK"}, {"recordId": 9495, "ArrivalTimeStamp": 1678891552.768, "data": "gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W"}, {"recordId": 9496, "ArrivalTimeStamp": 1678891553.082, "data": "gAAAAA

In [326]:
log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': 'gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK'},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': 'gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W'},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'd

In [327]:
decomp_log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': 'gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK'},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': 'gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W'},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'd