# 라이브러리 임포트

In [40]:
from torchvision.datasets import ImageFolder
from torchvision import transforms, models
from torch.utils.data import DataLoader, Subset, Dataset
from sklearn.model_selection import StratifiedShuffleSplit
from PIL import Image
import matplotlib.pyplot as plt
import koreanize_matplotlib
from pyngrok import ngrok
import torch
from torch import nn, optim
from torchvision import datasets, transforms
import mlflow
import mlflow.pytorch
import os
import optuna
from optuna.integration.mlflow import MLflowCallback
import numpy as np
import pandas as pd
from torchinfo import summary
from tqdm import tqdm
import time
import os
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
from torch.optim.lr_scheduler import StepLR
from dotenv import load_dotenv

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

load_dotenv()  # .env 파일 자동 로드

ngrok_token = os.getenv("NGROK_AUTH_TOKEN")
github_token = os.getenv("GITHUB_TOKEN")

# MLflow 환경설정

In [42]:
mlflow.set_tracking_uri("/Users/sindongjun/AIOpsCICD/mlflow_logs")  # Google Drive에 저장
mlflow.set_experiment("AIOPS_v1.2")  # 실험 이름 설정
RUN_NAME = 'EfficientNet-B0_base'

# 파라미터 정의

In [43]:
params = {'LR' : 0.1e-3,
          'BATCH_SIZE' : 32,
          'EPOCH' : 10,
          'TRAIN_RATIO': 0.8,
          'LR_STEP' : 3,
          'LR_GAMMA' : 0.9
          }

criterion = nn.CrossEntropyLoss()
model_type = 'efficientnetb0'
path = '/Users/sindongjun/AIOpsCICD'
save_model_path = os.path.join(path, f'results/{model_type}.pth')
save_history_path = os.path.join(path, f'results/{model_type}_history.pth')

# 커스텀 데이터셋 생성

In [44]:
# CustomOrderImageFolder: ImageFolder를 상속받아 custom_order를 적용하는 클래스
class CustomOrderImageFolder(ImageFolder):
    def __init__(self, root, transform=None, target_transform=None, custom_order=None):
        """
        Args:
            root (str): 이미지가 저장된 최상위 경로.
            transform (callable, optional): 이미지 변환.
            target_transform (callable, optional): 레이블 변환.
            custom_order (list, optional): 원하는 클래스 순서의 리스트.
        """
        self.custom_order = custom_order
        super().__init__(root, transform=transform, target_transform=target_transform)

    def find_classes(self, directory):
        # 폴더 내의 클래스(폴더) 이름을 추출
        if self.custom_order is not None:
            classes = self.custom_order
        else:
            classes = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
            classes.sort()
        class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
        return classes, class_to_idx

# CustomSubset: 기본 ImageFolder (CustomOrderImageFolder)에서 인덱스별로 데이터를 로드하며, 개별 transform을 적용하는 Dataset 클래스
class CustomSubset(Dataset):
    def __init__(self, image_folder, indices, transform):
        """
        Args:
            image_folder (ImageFolder): 기본 이미지 데이터셋 (transform=None이어야 함)
            indices (list or array): 선택할 데이터 인덱스 목록
            transform (callable): 해당 데이터셋에 적용할 transform
        """
        self.image_folder = image_folder
        self.indices = indices
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        # 실제 인덱스
        actual_idx = self.indices[idx]
        # image_folder.samples는 (path, label) 튜플의 리스트
        path, label = self.image_folder.samples[actual_idx]
        # image_folder.loader는 PIL.Image로 이미지를 로드하는 함수
        image = self.image_folder.loader(path)
        # 지정된 transform을 적용하여 Tensor로 변환 (또는 다른 변환)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

def get_dataloaders(train_dataset, val_dataset, BATCH_SIZE):
    train_DL = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_DL = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    return train_DL, val_DL

custom_order = ['양호', '경증', '중등도', '중증']

# 기본 dataset은 transform 없이 생성 (순서 유지를 위해)
base_dataset = CustomOrderImageFolder(root=os.path.join(path, 'data'),
                                        transform=None,
                                        custom_order=custom_order)

# 전체 데이터셋의 레이블 목록 생성
targets = np.array([label for _, label in base_dataset.samples])

# 전체 데이터를 train (80%)와 test+val (20%)로 stratified split
sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_idx, test_val_idx in sss1.split(np.zeros(len(targets)), targets):
    pass

# test+val 셋을 50:50 비율로 분할하여 validation (10%)과 test (10%) 데이터셋 생성
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
test_val_targets = targets[test_val_idx]
for val_rel_idx, test_rel_idx in sss2.split(np.zeros(len(test_val_targets)), test_val_targets):
    val_idx = test_val_idx[val_rel_idx]
    test_idx = test_val_idx[test_rel_idx]
    break

# 각 데이터셋마다 적용할 transform 정의
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# CustomSubset을 이용해 각 데이터셋 생성 (각각 다른 transform 적용)
train_dataset = CustomSubset(base_dataset, train_idx, transform=train_transform)
val_dataset   = CustomSubset(base_dataset, val_idx, transform=val_transform)
test_dataset  = CustomSubset(base_dataset, test_idx, transform=test_transform)

# DataLoader 생성 (num_workers=0으로 하여 멀티프로세싱 문제 배제)
train_DL, val_DL = get_dataloaders(train_dataset, val_dataset, params['BATCH_SIZE'])
test_DL  = DataLoader(test_dataset, batch_size=params['BATCH_SIZE'], shuffle=False, num_workers=0)

print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(val_dataset)}")
print(f"Test size: {len(test_dataset)}")

# 한 배치를 불러와서 확인 (모든 이미지가 Tensor로 변환되었는지 확인)
for images, labels in train_DL:
    print(f"Batch image type: {type(images)}, shape: {images.shape}")
    break

Train size: 960
Validation size: 120
Test size: 120
Batch image type: <class 'torch.Tensor'>, shape: torch.Size([32, 3, 224, 224])


# 모델 불러오기

In [45]:
# 모델 불러오기
second_model_path = '/Users/sindongjun/AIOpsCICD/final_best_model.pth'

load_model = torch.load(second_model_path, map_location=DEVICE, weights_only=False)['model']

# MLflow 연동

In [47]:
from pyngrok import ngrok

# ngrok 인증 (변수 활용)
ngrok.set_auth_token(ngrok_token)

# # 기존 ngrok 세션 종료
ngrok.kill()

In [48]:
# 2. MLflow UI 실행 (백그라운드 실행)
get_ipython().system_raw("mlflow ui --backend-store-uri '//Users/sindongjun/AIOpsNLP/mlflow_logs' --port 5001 &")

# 3. ngrok을 이용해 터널링 (HTTP 방식으로 명시적으로 설정)
public_url = ngrok.connect(5001, "http")
print(f"MLflow UI에 접속하려면 다음 링크를 클릭하세요: {public_url}")

MLflow UI에 접속하려면 다음 링크를 클릭하세요: NgrokTunnel: "https://e89c-61-34-253-239.ngrok-free.app" -> "http://localhost:5001"


[2025-05-26 21:27:56 +0900] [21146] [INFO] Starting gunicorn 23.0.0
[2025-05-26 21:27:56 +0900] [21146] [ERROR] Connection in use: ('127.0.0.1', 5001)
[2025-05-26 21:27:56 +0900] [21146] [ERROR] connection to ('127.0.0.1', 5001) failed: [Errno 48] Address already in use
[2025-05-26 21:27:57 +0900] [21146] [ERROR] Connection in use: ('127.0.0.1', 5001)
[2025-05-26 21:27:57 +0900] [21146] [ERROR] connection to ('127.0.0.1', 5001) failed: [Errno 48] Address already in use
[2025-05-26 21:27:58 +0900] [21146] [ERROR] Connection in use: ('127.0.0.1', 5001)
[2025-05-26 21:27:58 +0900] [21146] [ERROR] connection to ('127.0.0.1', 5001) failed: [Errno 48] Address already in use
[2025-05-26 21:27:59 +0900] [21146] [ERROR] Connection in use: ('127.0.0.1', 5001)
[2025-05-26 21:27:59 +0900] [21146] [ERROR] connection to ('127.0.0.1', 5001) failed: [Errno 48] Address already in use
[2025-05-26 21:28:00 +0900] [21146] [ERROR] Connection in use: ('127.0.0.1', 5001)
[2025-05-26 21:28:00 +0900] [21146] [

# FAST API 정의

In [49]:
ngrok.kill()

In [50]:
# --- FastAPI 코드 저장 ---
code = """
from fastapi import FastAPI, Request, UploadFile, File, Form
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from fastapi.staticfiles import StaticFiles
from PIL import Image
import torch, io
from torchvision import transforms

TEMPLATE_DIR = '/Users/sindongjun/AIOpsCICD'
app = FastAPI()
templates = Jinja2Templates(directory=TEMPLATE_DIR)

# 구글 드라이브에 있는 templates 경로 지정
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ckpt = torch.load('/Users/sindongjun/AIOpsCICD/model.pkl',
                  map_location=DEVICE, weights_only=False)
model = ckpt['model'].to(DEVICE)
model.eval()

preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
labels = ['양호', '경증', '중등도', '중증']


@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})

@app.post("/predict", response_class=HTMLResponse)
async def predict(request: Request, file: UploadFile = File(...)):
    # 이미지 파일 읽기
    contents = await file.read()
    img = Image.open(io.BytesIO(contents)).convert('RGB')
    x = preprocess(img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        y = model(x).argmax(1).item()
    y = 1  # 예시 (실제로는 모델로 예측)
    pred_label = labels[y]
    return templates.TemplateResponse("result.html", {"request": request, "prediction": pred_label})
"""
with open('/Users/sindongjun/AIOpsCICD/Main.py', 'w') as f:
    f.write(code)

# --- 서버 & ngrok ---
!pkill -f ngrok        # 터널 초기화

import nest_asyncio, uvicorn, threading, time
from pyngrok import ngrok
nest_asyncio.apply()

def run():
    uvicorn.run('Main:app', host='0.0.0.0', port=8000, reload=False)
threading.Thread(target=run, daemon=True).start()
time.sleep(2)

public_url = ngrok.connect(8000)
print('✅ FastAPI URL:', public_url)

Exception in thread Thread-8 (run):
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/c5/3w490dqd0630nzl56hkxz6_r0000gn/T/ipykernel_19527/3306876248.py", line 56, in run
  File "/opt/anaconda3/lib/python3.12/site-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/opt/anaconda3/lib/python3.12/site-packages/uvicorn/server.py", line 66, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/nest_asyncio.py", line 98, in run_until_complete
    return f.result()
           ^^^^^^^^^^

✅ FastAPI URL: NgrokTunnel: "https://49c1-61-34-253-239.ngrok-free.app" -> "http://localhost:8000"


# Docker 연동

In [51]:
import torch
import pickle
from torchvision.models import efficientnet_b0

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ckpt_path = "/Users/sindongjun/AIOpsCICD/final_best_model.pth"

# 1) .pth로부터 전체 모델 객체 복원
ckpt = torch.load(ckpt_path, map_location=DEVICE, weights_only=False)
model = ckpt["model"].to("cpu")   # 컨테이너에서 GPU 없을 수도 있으니 CPU로 저장
model.eval()

# 2) pickle로 덤프
with open("/Users/sindongjun/AIOpsCICD/model.pkl", "wb") as f:
    pickle.dump(model, f)

print("✅ model.pkl 저장 완료:" )
!ls -lh model.pkl

✅ model.pkl 저장 완료:
-rw-r--r--  1 sindongjun  staff    16M May 26 21:28 model.pkl


In [52]:
%%writefile requirements.txt
fastapi
uvicorn[standard]
pillow
torch
torchvision
scikit-learn
python-multipart

Overwriting requirements.txt


In [53]:
%%writefile Dockerfile
# Dockerfile 예시
FROM python:3.10-slim
WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD ["uvicorn", "Main:app", "--host", "0.0.0.0", "--port", "8000"]

Overwriting Dockerfile


In [54]:
# ml-api-app.zip 파일 생성
!zip -r ml-api-app.zip Main.py model.pkl requirements.txt Dockerfile

updating: Main.py (deflated 46%)
updating: model.pkl (deflated 8%)
updating: requirements.txt (deflated 14%)
updating: Dockerfile (deflated 18%)


## zipfile 해제

In [55]:
import zipfile
import os

# 압축 파일 경로 (예: 현재 폴더에 ml-api-app.zip)
zip_path = "/Users/sindongjun/AIOpsCICD/ml-api-app.zip"

# 압축 해제할 위치 (예: Desktop)
extract_dir = "/Users/sindongjun/AIOpsCICD/ml-api-app"

# 디렉토리 없으면 생성
os.makedirs(extract_dir, exist_ok=True)

# 압축 해제
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"압축이 {extract_dir} 폴더로 해제되었습니다.")

압축이 /Users/sindongjun/AIOpsCICD/ml-api-app 폴더로 해제되었습니다.


# Github Actions 설정

In [56]:
# 디렉토리 생성 (숨김폴더)
!mkdir -p .github/workflows

In [57]:
# 숨김 폴더 내용 확인
!ls -la .github/workflows

total 8
drwxr-xr-x  3 sindongjun  staff   96 May 26 20:43 [34m.[m[m
drwxr-xr-x  3 sindongjun  staff   96 May 26 20:43 [34m..[m[m
-rw-r--r--  1 sindongjun  staff  727 May 26 20:43 docker-build.yml


In [62]:
# GitHub Actions 워크플로우 파일 생성
workflow_code = """
name: Build and Push Docker Image

on:
  push:
    branches: [main]

jobs:
  build:
    runs-on: ubuntu-latest

    steps:
    - name: Checkout source code
      uses: actions/checkout@v2

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v2

    - name: Log in to DockerHub
      run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin

    - name: Build Docker image
      run: docker build -t ${{ secrets.DOCKER_USERNAME }}/scalp:latest .

    - name: Push Docker image
      run: docker push ${{ secrets.DOCKER_USERNAME }}/scalp:latest

    - name: Trigger Render Deploy Hook
      run: |
        curl -X POST ${{ secrets.RENDER_DEPLOY_HOOK }}

"""
with open(".github/workflows/docker-build.yml", "w") as f:
    f.write(workflow_code)

In [63]:
!git add .

In [60]:
!git commit -m "Initial commit with CI/CD_v2"

[main 16ef7e1] Initial commit with CI/CD
 1218 files changed, 281 insertions(+), 81 deletions(-)
 create mode 100644 .github/workflows/docker-build.yml
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 Main.py
 create mode 100644 README.md
 create mode 100644 "data/\352\262\275\354\246\235/0013_A2LEBJJDE00060O_1604540048368_2_TH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0013_A2LEBJJDE00060O_1605534176626_2_TH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0013_A2LEBJJDE00060O_1605534189733_2_TH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0131_A2LEBJJDE00166C_1604627632520_5_RH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0131_A2LEBJJDE00166C_1604637226775_4_LH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0131_A2LEBJJDE00166C_1604904858222_6_BH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0131_A2LEBJJDE00166C_1604905984677_5_RH.jpg"
 create mode 100644 "data/\352\262\275\354\246\235/0335_