diff --git a/app/api/challenge.py b/app/api/challenge.py index b46a4c1..2d7613f 100644 --- a/app/api/challenge.py +++ b/app/api/challenge.py @@ -1,11 +1,13 @@ from json import JSONDecodeError from logging import log +from app.monitoring.ctf_metrics_collector import ChallengeMetricsCollector from flask import Blueprint, jsonify, request from app.exceptions.api import InvalidRequest from app.exceptions.userchallenge import UserChallengeCreationError, UserChallengeDeletionError, UserChallengeNotFoundError from app.extensions.db.repository import UserChallengesRepository from app.extensions.k8s.client import K8sClient +from app.monitoring.ctf_metrics_collector import challenge_metrics_collector challenge_bp = Blueprint('challenge', __name__) @@ -15,21 +17,33 @@ def create_challenge(): # Challenge 관련 정보 가져오기 res = request.get_json() if not res: - raise InvalidRequest(error_msg="Request body is empty or not valid JSON") + challenge_metrics_collector.challenge_operations.labels(operation='create',result='error').inc() + raise InvalidRequest(error_msg="Request body is empty or not valid JSON") if 'challenge_id' not in res: + challenge_metrics_collector.challenge_operations.labels(operation='create',result='error').inc() raise InvalidRequest(error_msg="Required field 'challenge_id' is missing in request") challenge_id = res['challenge_id'] if 'username' not in res: + challenge_metrics_collector.challenge_operations.labels(operation='create',result='error').inc() raise InvalidRequest(error_msg="Required field 'username' is missing in request") username = res['username'] # 챌린지 생성 client = K8sClient() endpoint = client.create_challenge_resource(challenge_id, username) if not endpoint: + challenge_metrics_collector.challenge_operations.labels(operation='create',result='error').inc() raise UserChallengeCreationError(error_msg=f"Faile to create challenge {challenge_id} for user {username}") + + challenge_metrics_collector.challenge_state.labels( + challenge_id=challenge_id, + username=username, + state='active' + ).set(1) + challenge_metrics_collector.challenge_operations.labels(operation='create',result='success').inc() + return jsonify({'data' : {'port': endpoint}}), 200 @challenge_bp.route('/delete', methods=['POST']) @@ -42,15 +56,18 @@ def delete_userchallenges(): res = request.get_json() if not res: log.error("No data provided") + challenge_metrics_collector.challenge_operations.labels(operation='delete',result='error').inc() raise UserChallengeDeletionError(error_msg="Request body is empty or not valid JSON") if 'challenge_id' not in res: log.error("No challenge_id provided") + challenge_metrics_collector.challenge_operations.labels(operation='delete',result='error').inc() raise InvalidRequest(error_msg="Required field 'challenge_id' is missing in request") challenge_id = res['challenge_id'] if 'username' not in res: log.error("No username provided") + challenge_metrics_collector.challenge_operations.labels(operation='delete',result='error').inc() raise InvalidRequest(error_msg="Required field 'username' is missing in request") username = res['username'] @@ -58,9 +75,20 @@ def delete_userchallenges(): client = K8sClient() client.delete_userchallenge(username, challenge_id) + # Metrics + challenge_metrics_collector.challenge_state.labels( + challenge_id=challenge_id, + username=username, + state='inactive' + ).set(0) + challenge_metrics_collector.challenge_operations.labels( + operation='delete', + result='success' + ).inc() return jsonify({'message' : '챌린지가 정상적으로 삭제되었습니다.'}), 200 except JSONDecodeError as e: log.error("Invalid request format") + challenge_metrics_collector.challenge_operations.labels(operation='delete',result='error').inc() raise InvalidRequest(error_msg=str(e)) from e @challenge_bp.route('/status', methods=['POST']) diff --git a/app/factory.py b/app/factory.py index fa76e6e..bc19ae5 100644 --- a/app/factory.py +++ b/app/factory.py @@ -1,9 +1,14 @@ import sys + +from requests import Response +from app.monitoring.ctf_metrics_collector import ChallengeMetricsCollector from app.monitoring.loki_logger import FlaskLokiLogger +from app.monitoring.system_metrics_collector import SystemMetricsCollector from flask import Flask, g, request import threading from datetime import datetime from typing import Any, Dict, Type +from prometheus_client import REGISTRY, generate_latest, CONTENT_TYPE_LATEST from app.api.challenge import challenge_bp from app.config import Config @@ -22,12 +27,13 @@ def __init__(self, config_class: Type[Config] = Config): self.app = Flask(__name__) self.app.config.from_object(config_class) self.logger = FlaskLokiLogger(app_name="challenge-api", loki_url=self.app.config['LOKI_URL']).logger - + # 초기 설정 self._init_extensions() self._setup_middleware() self._register_error_handlers() self._setup_blueprints() + self._init_metrics_collector() def _init_extensions(self): """Extensions 초기화""" @@ -40,6 +46,13 @@ def _init_extensions(self): with self.app.app_context(): db.create_all() + def _init_metrics_collector(self): + + # System 메트릭 수집기 초기화 + system_collector = SystemMetricsCollector(self.app) + system_collector.start_collecting() + + def _setup_middleware(self): """미들웨어 설정""" @self.app.before_request @@ -80,7 +93,6 @@ def _get_request_context(self) -> Dict[str, Any]: "remote_addr": request.remote_addr, "user_agent": request.user_agent.string, "request_id": request.headers.get('X-Request-ID', 'unknown'), - "timestamp": datetime.utcnow().isoformat() } except Exception as e: # 요청 컨텍스트 추출 실패 시 기본값 @@ -103,7 +115,6 @@ def _log_request(self, response, processing_time: float): "request_id": context.get("request_id", "unknown"), "status_code": str(getattr(response, 'status_code', 'unknown')), "method": context.get("method", "UNKNOWN"), - "path": context.get("path", "/") } # Prepare log content @@ -111,19 +122,10 @@ def _log_request(self, response, processing_time: float): "processing_time_ms": round(processing_time * 1000, 2), "remote_addr": context.get("remote_addr", ""), "user_agent": context.get("user_agent", ""), - "method": context.get("method", ""), "path": context.get("path", ""), - "status_code": str(getattr(response, 'status_code', 'unknown')), - "timestamp": context.get("timestamp", datetime.utcnow().isoformat()) } - # 추가 정보 안전하게 포함 - try: - if request.is_json: - log_content["request_body"] = request.get_json() - except Exception as e: - log_content["request_body_error"] = str(e) - + self.logger.info( "HTTP Request", extra={ @@ -139,30 +141,19 @@ def _log_request(self, response, processing_time: float): def _log_error(self, error: CustomBaseException): """에러 로깅""" try: - # 요청 컨텍스트 안전하게 추출 - context = { - "method": getattr(request, 'method', 'UNKNOWN'), - "path": getattr(request, 'path', '/'), - "remote_addr": getattr(request, 'remote_addr', ''), - "user_agent": str(getattr(request, 'user_agent', '')), - "request_id": request.headers.get('X-Request-ID', 'unknown') if request else 'unknown' - } - # 로깅 self.logger.error( "Application Error", extra={ "labels": { "error_type": str(error.error_type.value), - "request_id": context.get('request_id', 'unknown') + "request_id": request.headers.get('X-Request-ID', 'unknown') if request else 'unknown' }, "content": { - **context, "error_type": str(error.error_type.value), "error_message": str(error.message), "error_msg": str(error.error_msg or ''), "status_code": error.status_code, - "timestamp": datetime.utcnow().isoformat() } } ) diff --git a/app/monitoring/ctf_metrics_collector.py b/app/monitoring/ctf_metrics_collector.py new file mode 100644 index 0000000..b17b69a --- /dev/null +++ b/app/monitoring/ctf_metrics_collector.py @@ -0,0 +1,60 @@ +from prometheus_client import REGISTRY, Gauge, Counter, CONTENT_TYPE_LATEST + +class ChallengeMetricsCollector: + def __init__(self): + # 챌린지 상태 Gauge + self.challenge_state = Gauge( + 'challenge_state', + 'Current state of challenges', + ['challenge_id', 'username','state'] # 1: active, 0: inactive + ) + + # API 요청 결과 카운터 + self.challenge_operations = Counter( + 'challenge_operations_total', + 'Challenge operation results', + ['operation', 'result'] # operation: create/delete/status, result: success/error + ) + + # 레지스트리에 메트릭 등록 + # self.register_metrics() + + def register_metrics(self): + """ + Prometheus 레지스트리에 메트릭을 등록 + """ + REGISTRY.register(self) + + def collect(self): + """ + Prometheus Collector 인터페이스 구현 메서드 + 실제 메트릭을 수집하여 반환 + """ + yield self.challenge_state + yield self.challenge_operations + + def update_challenge_state(self, challenge_id: str, username: str, state: int): + """ + 챌린지 상태 업데이트 + :param challenge_id: 챌린지 ID + :param username: 사용자 이름 + :param state: 상태 (1: active, 0: inactive) + """ + self.challenge_state.labels( + challenge_id=challenge_id, + username=username + ).set(state) + + def record_challenge_operation(self, operation: str, result: str): + """ + 챌린지 작업 결과 기록 + :param operation: 작업 유형 (create/delete/status) + :param result: 결과 (success/error) + """ + self.challenge_operations.labels( + operation=operation, + result=result + ).inc() + + +challenge_metrics_collector = ChallengeMetricsCollector() diff --git a/app/monitoring/system_metrics_collector.py b/app/monitoring/system_metrics_collector.py new file mode 100644 index 0000000..03294c0 --- /dev/null +++ b/app/monitoring/system_metrics_collector.py @@ -0,0 +1,144 @@ +import socket +from flask import Flask, Response +import psutil +from prometheus_client import Gauge, Counter, generate_latest, CONTENT_TYPE_LATEST +import time +from threading import Thread + +class SystemMetricsCollector: + def __init__(self, app: Flask = None): + # CPU metrics + self.cpu_usage = Gauge( + 'system_cpu_usage_percent', + 'CPU Usage in Percent', + ['cpu_type'] # user, system, idle + ) + + # Memory metrics + self.memory_usage = Gauge( + 'system_memory_bytes', + 'Memory Usage in Bytes', + ['type'] # used, free, cached, total + ) + + # Disk metrics + self.disk_usage = Gauge( + 'system_disk_bytes', + 'Disk Usage in Bytes', + ['mount_point', 'type'] # used, free, total + ) + + self.disk_io = Counter( + 'system_disk_io_bytes', + 'Disk I/O in Bytes', + ['operation'] # read, write + ) + + # Network metrics + self.network_traffic = Counter( + 'system_network_traffic_bytes', + 'Network Traffic in Bytes', + ['interface', 'direction'] # received, transmitted + ) + + self.network_connections = Gauge( + 'system_network_connections', + 'Number of Network Connections', + ['protocol', 'status'] # tcp/udp, ESTABLISHED/LISTEN/etc + ) + + # Register Flask app endpoint if provided + if app is not None: + self.init_app(app) + + def init_app(self, app: Flask): + """Register the metrics endpoint with a Flask app""" + @app.route('/metrics') + def metrics(): + return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST) + + def collect_cpu_metrics(self): + """Collect CPU metrics""" + cpu_times = psutil.cpu_times_percent() + self.cpu_usage.labels(cpu_type='user').set(cpu_times.user) + self.cpu_usage.labels(cpu_type='system').set(cpu_times.system) + self.cpu_usage.labels(cpu_type='idle').set(cpu_times.idle) + + def collect_memory_metrics(self): + """Collect memory metrics""" + mem = psutil.virtual_memory() + self.memory_usage.labels(type='total').set(mem.total) + self.memory_usage.labels(type='used').set(mem.used) + self.memory_usage.labels(type='free').set(mem.free) + self.memory_usage.labels(type='cached').set(mem.cached) + + def collect_disk_metrics(self): + """Collect disk metrics""" + # Disk usage + for partition in psutil.disk_partitions(): + if partition.fstype: + usage = psutil.disk_usage(partition.mountpoint) + self.disk_usage.labels( + mount_point=partition.mountpoint, + type='total' + ).set(usage.total) + self.disk_usage.labels( + mount_point=partition.mountpoint, + type='used' + ).set(usage.used) + self.disk_usage.labels( + mount_point=partition.mountpoint, + type='free' + ).set(usage.free) + + # Disk I/O + disk_io = psutil.disk_io_counters() + self.disk_io.labels(operation='read').inc(disk_io.read_bytes) + self.disk_io.labels(operation='write').inc(disk_io.write_bytes) + + def collect_network_metrics(self): + """Collect network metrics""" + # Network traffic + net_io = psutil.net_io_counters(pernic=True) + for interface, counters in net_io.items(): + self.network_traffic.labels( + interface=interface, + direction='received' + ).inc(counters.bytes_recv) + self.network_traffic.labels( + interface=interface, + direction='transmitted' + ).inc(counters.bytes_sent) + + # Network connections + connections = psutil.net_connections() + conn_count = {'tcp': {}, 'udp': {}} + for conn in connections: + proto = 'tcp' if conn.type == socket.SOCK_STREAM else 'udp' + status = conn.status + conn_count[proto][status] = conn_count[proto].get(status, 0) + 1 + + for proto in conn_count: + for status, count in conn_count[proto].items(): + self.network_connections.labels( + protocol=proto, + status=status + ).set(count) + + def collect_metrics(self): + """Collect all metrics periodically""" + while True: + try: + self.collect_cpu_metrics() + self.collect_memory_metrics() + self.collect_disk_metrics() + self.collect_network_metrics() + except Exception as e: + print(f"Error collecting metrics: {e}") + time.sleep(15) # Collect metrics every 15 seconds + + def start_collecting(self): + """Start a thread to collect metrics""" + thread = Thread(target=self.collect_metrics) + thread.daemon = True + thread.start() diff --git a/configs/prometheus/prometheus.yml b/configs/prometheus/prometheus.yml index f3ec85c..77f697a 100644 --- a/configs/prometheus/prometheus.yml +++ b/configs/prometheus/prometheus.yml @@ -7,6 +7,15 @@ scrape_configs: static_configs: - targets: ['localhost:9090'] - - job_name: 'flask-app' + - job_name: 'flask_system' static_configs: - - targets: ['localhost:8000'] \ No newline at end of file + - targets: ['192.168.67.2:5001'] + metrics_path: '/metrics' + scrape_interval: 15s + + # CTF 메트릭 수집 (/metrics/ctf) + # - job_name: 'flask_ctf' + # static_configs: + # - targets: ['192.168.67.2:5001'] + # metrics_path: '/metrics/ctf' + # scrape_interval: 15s \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5e95feb..f9370c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ Flask-SQLAlchemy==3.0.2 mariadb>=1.0.11 prometheus-client==0.19.0 python-logging-loki==0.3.1 -flask-prometheus-metrics==1.0.0 \ No newline at end of file +flask-prometheus-metrics==1.0.0 +psutil==5.9.8 \ No newline at end of file