In [10]:
# api 요청으로 log 데이터 추출(json)
# 추출된 log 데이터 저장
# requests 모듈 사용

import requests
import os
import json

url = 'http://ec2-3-37-12-122.ap-northeast-2.compute.amazonaws.com/api/data/log'
JSON_LOG_PATH_NOTE = './json_log_practice.json'

# api 요청으로 json_log 받기
def get_json_log(url):
    json_log = requests.get(url).json()
    return json_log

json_log = get_json_log(url)

# 받은 json_log 저장
def save_json_log(path, log):
    # path 를 가진 파일(로그가 저장된 파일)이 없을 경우, log 저장할 파일 생성
    if not os.path.isfile(path):
        with open(path, 'w') as f:
            json.dump(log, f, indent = 2)
    # path 를 가진 파일 있을 경우, 해당 파일에 이어서 log 저장
    else:
        with open(path, 'r') as f:
            log = json.load(f)
        with open(path, 'w') as f:
            json.dump(log, f, indent = 2)

save_json_log(JSON_LOG_PATH_NOTE, json_log)
    

In [11]:
# 받은 json_log 확인
json_log

[{'recordId': 48859,
  'ArrivalTimeStamp': 1678976918.66,
  'data': 'gAAAAABkErBIe3ywyhlTQf4aGNf6toxzfUmR7kIB9wShr0nfh0RfAdFW9sf-BzE2CKl8KpDzok6seo_D8fkwfoWIbOWOkudDGkpZMjlQm0MAOW5zQTLwu38FXP82yAMJfISfFrD5zTy-iT-b-e1aERxN6n6OcltWE-IMqYzGqqHSX_n3PpQt1fA3tCp_XQizjdULsDXfNbODliBWLF4IweFQ2InZ_P1080YCfzXl8-lM1Ou9iE0tyF-8C1-QNXZKfeym2JbKcwknBu9iA_7BWnOv7Q4mqsc2YAsoOVGEKI8HKt4Jts0kr3aHp1gTqjb7AokawyrrL4_MQIV7SCGxc7LfHhDGViTAT8EUGszKnwK5hQTPuJTd9EaaiFOPfd1xwl6fSmuFzoWyMIoxluSXW2-e6_hEmzhKxXgUNlxdCCWHqbVKnUoh46lqwxBH5Aw7JjnoB540XsBkHBiNdU6e6G4GxTxDkL_Zow=='},
 {'recordId': 48860,
  'ArrivalTimeStamp': 1678976918.945,
  'data': 'gAAAAABkErBIyWG-vyYIg5ofZ9HKlGgPbVQZCn-OJfLETWZla2-h0xCk-UjiHj6TxFdr5oF2hFY-PebrK44Us7Nnavf4FRJ3k9icBLEKd5tpFoKtzx7Fm89lzpiATmOpKQNa8ZTaz3_SmaSt102gmPgdh-ZoIEetiBwE6gpBZ7OeM_pDvp4eAG1_v25vrHHqup7fK-e2RrwLVnFvKiedVsuBUPX29AP7F7yWmmyEbAZ-KiOl0kotR_wkZzPAo-dqyOssPZyJwctf5LrA04a07Kbts_chNVLH1mpUqIK8eNp_ZljTldjPMJT2bE32KtVT9DxzjdD49oOLcDMzB2HzGo7U7Eyko-ra_mBYXQ3peQWJ5CKHenDWj

In [4]:
# 받은 log의 암호화된 데이터 복호화, 문자열 압축 알고리즘 적용 후 다시 암호화 및 저장
# 1. 암호화된 부분 복호화
# 2. b64uuid 모듈을 사용하여 'user_id' 64 자 -> 44 자 압축
# 3. 'method' 를 value 별 숫자로 압축
# 4. 'url' 을 value 별 숫자로 압축
# 5. 'inDate' 다른 표기방식으로 압축
# 6. 암호화
# 7. 파일 저장(.json)

In [12]:
# 1. 암호화된 부분 복호화

from cryptography.fernet import Fernet

key = b't-jdqnDewRx9kWithdsTMS21eLrri70TpkMq2A59jX8='

def decrypt_data(log, key):
    fernet = Fernet(key)
    for i in log:
        decrypted_data = fernet.decrypt(i['data']).decode('ascii')
        i['data'] = decrypted_data
        return

decrypt_data(json_log, key)

In [13]:
json_log

[{'recordId': 48859,
  'ArrivalTimeStamp': 1678976918.66,
  'data': "{'user_id': 'f3750fa96c10c43f6a5f6a6a45e151b84da314a1df4aec3f304895f450e4710a', 'record_id': 48859, 'activity': 'view', 'url': '/api/products/product/', 'method': 'POST', 'name': 'json_logger', 'inDate': '2023-03-16T14:28:38.660Z', 'detail': {'message': 'POST view', 'levelname': 'INFO'}}"},
 {'recordId': 48860,
  'ArrivalTimeStamp': 1678976918.945,
  'data': 'gAAAAABkErBIyWG-vyYIg5ofZ9HKlGgPbVQZCn-OJfLETWZla2-h0xCk-UjiHj6TxFdr5oF2hFY-PebrK44Us7Nnavf4FRJ3k9icBLEKd5tpFoKtzx7Fm89lzpiATmOpKQNa8ZTaz3_SmaSt102gmPgdh-ZoIEetiBwE6gpBZ7OeM_pDvp4eAG1_v25vrHHqup7fK-e2RrwLVnFvKiedVsuBUPX29AP7F7yWmmyEbAZ-KiOl0kotR_wkZzPAo-dqyOssPZyJwctf5LrA04a07Kbts_chNVLH1mpUqIK8eNp_ZljTldjPMJT2bE32KtVT9DxzjdD49oOLcDMzB2HzGo7U7Eyko-ra_mBYXQ3peQWJ5CKHenDWjLMFXWpgtTQEAs7WVmT_YUoEgaK5YlRw_joUyaJS7KNICnbTynp5Mef6CyoMh13QD_JDRxl9spRPONVAi0wc0LtMiN3KgAORg_2CgAOHjOQ1vA=='},
 {'recordId': 48861,
  'ArrivalTimeStamp': 1678976919.23,
  'data': 'gAAAAABkErBI

In [316]:
# b64uuid 모듈을 사용하여 'user_id' 데이터 44자로 변경

# json 모듈의 loads() 내장함수를 이용하여 log 의 'data' 부분을 dict 형태로 변환
import json

for d in log:
    d['data'] = json.loads(d['data'].replace("'", "\""))
    # replace 로 ' -> " 를 변환해주는 이유는 json 모듈이 key 나 value 를 문자열로 인식하기 때문
    # json 모듈의 인식가능한 문자열은 반드시 ""로 감싸여져 있어야 한다(JavaScript 에서 유래된 모듈이다)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'c9bf2018442a90ad2b0cd88c916f6f5e39e8ce4dd31970bab5738a5e0ca8e6c7',
   'record_id': 9494,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.456Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': '72b31cf00f8ab3967588fad4a32f61622cb162f9b7bc2cfbefba2084557370fa',
   'record_id': 9495,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.768Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': '7cabc7e02461a788af6c61e24d09df8790d9002f6bf614c6c88e5237ccd75011',
   'record_id': 9496,
   'activity': 'purchase',
   'url': '/api/products/product/',
   'metho

In [317]:
# b64uuid 모듈을 이용한 user_id 64(32 / 32) -> 44(22 / 22) 자 변환
import uuid
import base64

for i in log:
    user_id_1 = i['data']['user_id'][:32]
    user_id_2 = i['data']['user_id'][32:]
    uuid_1 = uuid.UUID(user_id_1)
    uuid_2 = uuid.UUID(user_id_2)
    b64uuid_1 = base64.urlsafe_b64encode(uuid_1.bytes).decode('utf-8').rstrip('=')
    b64uuid_2 = base64.urlsafe_b64encode(uuid_2.bytes).decode('utf-8').rstrip('=')
    b64uuid_id = b64uuid_1 + b64uuid_2
    i['data']['user_id'] = b64uuid_id

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.456Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.768Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': '/api/products/product/',
   'method': 'POST',
   'name': 'json_logger',
   'inDate': '2023-03-

In [318]:
li_method = {}
method_num = 0

def trans_method(data, li_method, method_num):
    if data['data']['method'] not in li_method:
        item = data['data']['method']
        method_num += 1
        li_method[item] = method_num
        trans_method(data, li_method, method_num)

    else:
        data['data']['method'] = li_method[data['data']['method']]
        return
        
for data in log:
    trans_method(data, li_method, method_num)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.456Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': '/api/products/product/',
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.768Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': '/api/products/product/',
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:53.082

In [319]:
li_method

{'POST': 1}

In [320]:
li_url = {}

def trans_url(data, li_url):
    if data['data']['url'] not in li_url:
        item = data['data']['url']
        if item[23:]:
            li_url[item] = int(item[23:])
        else:
            li_url[item] = 0
        trans_url(data, li_url)

    else:
        data['data']['url'] = li_url[data['data']['url']]
        return
        
for data in log:
    trans_url(data, li_url)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.456Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:52.768Z',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '2023-03-15T14:45:53.082Z',
   'detail': {'message': 'POST purchase', 'levelname': 'INFO'}}},

In [321]:
li_url

{'/api/products/product/': 0}

In [322]:
from datetime import datetime

def trans_indate(data):
    for i in data:
        inDate = i['data']['inDate']
        date = datetime.fromisoformat(inDate[:-1])
        outDate = date.strftime('%y%m%d%H%M%S%f')
        i['data']['inDate'] = outDate

trans_indate(log)

log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': {'user_id': 'yb8gGEQqkK0rDNiMkW9vXgOejOTdMZcLq1c4peDKjmxw',
   'record_id': 9494,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '230315144552456000',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': {'user_id': 'crMc8A-Ks5Z1iPrUoy9hYgLLFi-be8LPvvuiCEVXNw-g',
   'record_id': 9495,
   'activity': 'view',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '230315144552768000',
   'detail': {'message': 'POST view', 'levelname': 'INFO'}}},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'data': {'user_id': 'fKvH4CRhp4ivbGHiTQnfhwkNkAL2v2FMbIjlI3zNdQEQ',
   'record_id': 9496,
   'activity': 'purchase',
   'url': 0,
   'method': 1,
   'name': 'json_logger',
   'inDate': '230315144553082000',
   'detail': {'message': 'POST purchase', 'levelname': 'INFO'}}},
 {'recordId': 949

In [323]:
def encrypt_data(log):
    for i in log:
        json_log = i['data']
        fernet = Fernet(key)
        encrypt_str = fernet.encrypt(f'{json_log}'.encode('ascii'))
        i['data'] = encrypt_str.decode()

encrypt_data(log)
log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': 'gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK'},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': 'gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W'},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'd

In [324]:
import gzip

json_log = json.dumps(log)

with gzip.open('gzip_log.gz', 'wb') as f:
    f.write(json_log.encode('utf-8'))

with gzip.open('gzip_log.gz', 'rb') as f:
    read_log = f.read().decode('utf-8')

decomp_log = json.loads(read_log)

assert log == decomp_log

In [325]:
read_log

'[{"recordId": 9494, "ArrivalTimeStamp": 1678891552.456, "data": "gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK"}, {"recordId": 9495, "ArrivalTimeStamp": 1678891552.768, "data": "gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W"}, {"recordId": 9496, "ArrivalTimeStamp": 1678891553.082, "data": "gAAAAA

In [326]:
log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': 'gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK'},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': 'gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W'},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'd

In [327]:
decomp_log

[{'recordId': 9494,
  'ArrivalTimeStamp': 1678891552.456,
  'data': 'gAAAAABkEaDMZu20rNid_Oao_r0g2fFe-rWWfRMpuOhpNRkEZqdnbCRfJu51aMW3rIS9EVcadr9GMX8yTzupWTBvZMhLZKeFXdQ1BvhilD_JpdfyKIZ4E-tCYfij4UETgrL6BXUKB6SYXCen_wRxrliSBaVkUgRxFQmXPFhk5N_QuH0WrD0Vm0p5_bbOKMuAq-uzrAfgK9l1NQzw7bqUH6NxTgyvvrOUWXVU2keCSvGOm0W62zXRQCfmOYdqjLoyQWwqLrOel17fm80VD-a5n-HhK0sIGMzGl8CCpwTkbLSo5RuVy4aOBn8aEMBx0mOMIR4hQLZzjcNckfq3iwZv5EhBiznLmxrEo-DCZLq8WqnHtV3JGeg1xHijjosKihuIpBpxMNpU0oeK'},
 {'recordId': 9495,
  'ArrivalTimeStamp': 1678891552.768,
  'data': 'gAAAAABkEaDMqAVWAn8GL4SqyxavE4A_PIEBXgZ_-7mgoRw6mtcFgeYR1LtwlYDZkhbHcPsMxf6j8Zo5poh9MYtaAx311p5ymLA-48wrAC0V-_lMi0VdHClhaDFHerODZQFHRhHiSTercOGYfuWjY3vrof12wdW3-Gtx7N7vJ5VSrQ8xhZVkqqO5AfLKBsShf6jEP0RCiqnqhY5DGlKKzPmyS_-Aw8q9_HypbqzEWlJPj2_wa8FwUzZx1uk6jIIT6YrZCBakDlxb9BXWZtBaJeV7JonL6Jx76JQ-61L0pAxp6a7Y2xJBcmV8bXLW2V99CkC9KpBPtpjgI1yQZCoAZgQmZzhs4B11zv1E2gMwR--DbhRTDGp2engHAFv_YEjN--X6C36-3z7W'},
 {'recordId': 9496,
  'ArrivalTimeStamp': 1678891553.082,
  'd