In [None]:
# 기본 패키지 설치 (추가: fastapi, uvicorn, nest_asynci)
!pip install --upgrade pip setuptools wheel
!pip install pandas scikit-learn joblib fastapi uvicorn nest-asyncio konlpy optuna

# JPype1 버전 고정 (konlpy 호환)
!pip uninstall JPype1 -y
!pip install JPype1==1.4.1

Found existing installation: JPype1 1.4.1
Uninstalling JPype1-1.4.1:
  Successfully uninstalled JPype1-1.4.1
Collecting JPype1==1.4.1
  Using cached JPype1-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Using cached JPype1-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (464 kB)
Installing collected packages: JPype1
Successfully installed JPype1-1.4.1


In [None]:
!apt-get install -y clinfo
!clinfo

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
clinfo is already the newest version (3.0.21.02.21-1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.
Number of platforms                               0


In [None]:
!pip install optuna



In [None]:
%%bash
# 1. 시스템 의존성 설치 (CMake, Boost 등)
apt-get update && apt-get install -y cmake libboost-dev libboost-system-dev libboost-filesystem-dev

# 2. 기존 LightGBM 폴더 삭제 후 GitHub에서 클론
rm -rf LightGBM
git clone --recursive https://github.com/microsoft/LightGBM.git

# 3. v3.3.2 태그로 체크아웃 (setup.py 기반 버전)
cd LightGBM
git checkout tags/v3.3.2 -b v3.3.2_gpu

# 4. CUDA 지원 빌드 진행
mkdir build && cd build
cmake -DUSE_CUDA=ON ..
make -j4

# 5. Python 바인딩 설치 (setup.py를 사용하여 precompile 옵션으로 설치)
cd ../python-package
python setup.py install --precompile


Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [70.9 kB]
Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,381 kB]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:13 https://r2u.stat.illinois.edu/ubuntu 

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Cloning into 'LightGBM'...
Submodule 'include/boost/compute' (https://github.com/boostorg/compute) registered for path 'external_libs/compute'
Submodule 'eigen' (https://gitlab.com/libeigen/eigen.git) registered for path 'external_libs/eigen'
Submodule 'external_libs/fast_double_parser' (https://github.com/lemire/fast_double_parser.git) registered for path 'external_libs/fast_double_parser'
Submodule 'external_libs/fmt' (https://github.com/fmtlib/fmt.git) registered for path 'external_libs/fmt'
Cloning into '/content/LightGBM/external_libs/compute'...
Cloning into '/content/LightGBM/external_libs/eigen'...
Cloning into '/content/LightGBM/external_libs/fast_double_parser'...
Cloning into '/content/LightGBM/external_libs/fmt'...
Submodule 'benchmark/dependencies/abseil-cpp' (https://github.com/abseil/a

In [None]:
from google.colab import files
uploaded = files.upload()  # CSV 파일 업로드


Saving 소상공인시장진흥공단_상가(상권)정보_경북_202412.csv to 소상공인시장진흥공단_상가(상권)정보_경북_202412.csv


In [None]:
import pandas as pd
import re
from konlpy.tag import Okt
from joblib import dump
from tqdm import tqdm

# tqdm의 pandas 확장 활성화
tqdm.pandas()

# CSV 데이터 로드
print("[1/4] 데이터 로드 중...")
df = pd.read_csv("소상공인시장진흥공단_상가(상권)정보_경북_202412.csv")
print(f"원본 데이터 크기: {df.shape}")

# 형태소 분석기 초기화
okt = Okt()

# 전처리 함수: 소문자화, 특수문자 제거, 명사 추출
def enhanced_preprocess(text):
    text = str(text).lower()
    text = re.sub(r"[^가-힣a-zA-Z0-9\s]", "", text)
    nouns = okt.nouns(text)
    return ' '.join(nouns).strip()

# 상호명 전처리 진행 (tqdm으로 진행률 확인)
print("[2/4] 상호명 전처리 중...")
df["상호명_전처리"] = df["상호명"].progress_apply(enhanced_preprocess)

# 8개 메인 카테고리 매핑 (원하는 코드에 맞게 수정)
category_mapping = {
    "식비": ["I201", "I202", "I203", "I204", "I205", "I206", "I207",
           "I210", "I211", "I212", "G204", "G205", "G206"],
    "교통": ["G214", "N109", "S203"],
    "주거": ["L102", "D101", "D102", "D103"],
    "의료": ["Q101", "Q102", "Q104", "G215", "M111"],
    "문화": ["R102", "R103", "R104", "I101", "I102", "N110"],
    "쇼핑": ["G202", "G203", "G208", "G209", "G210", "G211", "G212",
             "G216", "G217", "G218", "G219", "G220", "G221", "G222"],
    "교육": ["P105", "P106", "P107", "G213"],
    "기타": ["M103", "M104", "M105", "M106", "M107", "M109", "M112",
           "M113", "M114", "M115", "N101", "N102", "N103", "N104",
           "N105", "N107", "N108", "N111", "S201", "S202", "S204",
           "S205", "S206", "S207", "S208", "S209", "S210", "S211", "G207"]
}

# 코드별 카테고리 매핑
print("[3/4] 카테고리 매핑 중...")
code_to_category = {code: cat for cat, codes in category_mapping.items() for code in codes}
df["카테고리"] = df["상권업종중분류코드"].astype(str).map(lambda x: code_to_category.get(x, "기타"))

# 전처리 결과 저장 (추후 모델 학습에 사용)
df.to_csv("preprocessed_data.csv", index=False)
dump(code_to_category, "category_mapping.joblib")
print("전처리 완료! (preprocessed_data.csv 생성)")


[1/4] 데이터 로드 중...
원본 데이터 크기: (147012, 39)
[2/4] 상호명 전처리 중...


100%|██████████| 147012/147012 [01:18<00:00, 1862.59it/s]


[3/4] 카테고리 매핑 중...
전처리 완료! (preprocessed_data.csv 생성)


In [None]:
##############################
# 0) 라이브러리 & 설치 (Colab 경우)
##############################
# 필요 시 아래 명령어 실행 (런타임 재시작 후 진행 권장)
# !pip install --upgrade pip setuptools wheel
# !pip install pandas scikit-learn joblib fastapi uvicorn nest-asyncio optuna konlpy imbalanced-learn
# !pip install JPype1==1.4.1  # konlpy 호환

import pandas as pd
import numpy as np
import re
import scipy.sparse
import joblib
import optuna
from tqdm import tqdm

##############################
# 1) konlpy, imblearn 및 기타 임포트
##############################
from konlpy.tag import Okt
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import train_test_split
import xgboost as xgb

##############################
# 2) 데이터 로드 및 기본 전처리
##############################
# "preprocessed_data.csv" 파일에 ["상호명_전처리", "카테고리", "상호명"] 컬럼이 있다고 가정
df = pd.read_csv("preprocessed_data.csv", usecols=["상호명_전처리", "카테고리", "상호명"])
df["상호명_전처리"] = df["상호명_전처리"].fillna("").astype(str)

##############################
# 3) 명사 추출 함수 (전처리)
##############################
okt = Okt()
def only_nouns(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[^가-힣0-9a-z\s]", "", text)
    nouns = okt.nouns(text)
    stopwords = {"점", "센터", "코너", "플라자", "주식회사", "유한회사"}
    tokens = [w for w in nouns if w not in stopwords and len(w) > 1]
    return " ".join(tokens).strip()

# 이미 어느 정도 전처리된 컬럼이지만, 일관성을 위해 재적용
tqdm.pandas()
df["상호명_전처리"] = df["상호명_전처리"].progress_apply(only_nouns)

##############################
# 4) 쇼핑 키워드 피처 추가
##############################
# 쇼핑 관련 대표 키워드 목록 (필요에 따라 수정)
shopping_keywords = ["마트", "편의점", "백화점", "쇼핑", "문구", "의류", "화장품", "가전", "가구", "서점"]

def has_shop_keyword(text: str) -> int:
    # 원본 상호명을 활용하거나 전처리 후 상호명을 활용할 수 있음.
    # 여기서는 원본 상호명을 사용하여, 영문 및 특수문자 등도 고려.
    text = text.lower()
    for kw in shopping_keywords:
        if kw in text:
            return 1
    return 0

# 추가 피처: 쇼핑 관련 키워드 포함 여부
df["has_shop_kw"] = df["상호명"].apply(has_shop_keyword)

##############################
# 5) 라벨 인코딩
##############################
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(df["카테고리"])

##############################
# 6) 데이터 분할
##############################
# 텍스트 데이터 (전처리된 상호명)와 추가 피처 (has_shop_kw) 분리
X_text_raw = df["상호명_전처리"].values  # 문자열 배열
X_kw = df["has_shop_kw"].values.reshape(-1, 1)  # 수치형 배열

# 먼저 train/test 분할 (동일 인덱스 유지)
X_text_train, X_text_test, y_train, y_test, X_kw_train, X_kw_test = train_test_split(
    X_text_raw, y, X_kw, test_size=0.2, random_state=42, stratify=y
)

##############################
# 7) Optuna 탐색 함수 정의 (키워드 피처 포함)
##############################
from sklearn.feature_extraction.text import TfidfVectorizer

def objective(trial):
    # --- 7.1) TF-IDF 설정 ---
    analyzer_choice = trial.suggest_categorical("analyzer", ["word", "char"])
    ngram_lower = 1
    ngram_upper = trial.suggest_int("ngram_upper", 2, 3)  # 예: (1,2) 또는 (1,3)

    if analyzer_choice == "word":
        # 단어 기반: 이미 only_nouns()로 토큰화되어 있으므로 단순 split 사용
        vectorizer = TfidfVectorizer(
            tokenizer=lambda x: x.split(),
            analyzer="word",
            ngram_range=(ngram_lower, ngram_upper),
            min_df=3, max_df=0.85,
            dtype=np.float32
        )
    else:
        # 문자 기반: 전처리 함수 적용 후 문자 n-그램 생성
        vectorizer = TfidfVectorizer(
            preprocessor=only_nouns,
            analyzer="char_wb",
            ngram_range=(ngram_lower, ngram_upper),
            min_df=3, max_df=0.85,
            dtype=np.float32
        )

    X_train_tfidf = vectorizer.fit_transform(X_text_train)
    X_test_tfidf = vectorizer.transform(X_text_test)

    # --- 7.2) 키워드 피처 결합 ---
    # 추가 피처는 dense이므로 sparse 변환 후 hstack
    from scipy.sparse import csr_matrix, hstack
    X_kw_train_sparse = csr_matrix(X_kw_train)
    X_kw_test_sparse = csr_matrix(X_kw_test)

    X_train_all = hstack([X_train_tfidf, X_kw_train_sparse])
    X_test_all  = hstack([X_test_tfidf, X_kw_test_sparse])

    # --- 7.3) 오버샘플링 방법 ---
    sampler_choice = trial.suggest_categorical("sampler", ["none", "smote", "adasyn"])
    if sampler_choice == "smote":
        sampler = SMOTE(random_state=42)
        X_res, y_res = sampler.fit_resample(X_train_all, y_train)
    elif sampler_choice == "adasyn":
        sampler = ADASYN(random_state=42)
        X_res, y_res = sampler.fit_resample(X_train_all, y_train)
    else:
        X_res, y_res = X_train_all, y_train

    # --- 7.4) XGBoost 하이퍼파라미터 ---
    param = {
        "objective": "multi:softmax",
        "num_class": len(le.classes_),
        "tree_method": "hist",   # XGBoost 2.0 이상 권장 (hist + device)
        "device": "cuda",
        "eval_metric": "mlogloss",  # 조기종료 기준용
        "eta": trial.suggest_float("eta", 1e-3, 1e-1, log=True),
        "max_depth": trial.suggest_int("max_depth", 6, 16),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 8),
        "subsample": trial.suggest_float("subsample", 0.7, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
    }
    num_boost_round = trial.suggest_int("num_boost_round", 100, 500)

    # --- 7.5) XGBoost 학습 ---
    dtrain = xgb.DMatrix(X_res, label=y_res)
    dtest  = xgb.DMatrix(X_test_all, label=y_test)
    watchlist = [(dtrain, "train"), (dtest, "eval")]
    booster = xgb.train(
        params=param,
        dtrain=dtrain,
        num_boost_round=num_boost_round,
        evals=watchlist,
        early_stopping_rounds=10,
        verbose_eval=False
    )

    best_iter = booster.best_iteration
    y_pred = booster.predict(dtest, iteration_range=(0, best_iter+1))

    return f1_score(y_test, y_pred, average="weighted")

##############################
# 8) Optuna 탐색 실행
##############################
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)  # n_trials는 시간 여유에 따라 조정

print("[Optuna] Best score:", study.best_trial.value)
print("[Optuna] Best params:", study.best_trial.params)

##############################
# 9) 최적 파라미터로 최종 재학습
##############################
best_params = study.best_params

# --- 9.1) Vectorizer 재생성 ---
if best_params["analyzer"] == "word":
    final_vectorizer = TfidfVectorizer(
        tokenizer=lambda x: x.split(),
        analyzer="word",
        ngram_range=(1, best_params["ngram_upper"]),
        min_df=3, max_df=0.85, dtype=np.float32
    )
else:
    final_vectorizer = TfidfVectorizer(
        preprocessor=only_nouns,
        analyzer="char_wb",
        ngram_range=(1, best_params["ngram_upper"]),
        min_df=3, max_df=0.85, dtype=np.float32
    )
X_train_tfidf = final_vectorizer.fit_transform(X_text_train)
X_test_tfidf  = final_vectorizer.transform(X_text_test)

# --- 9.2) 키워드 피처 결합 ---
from scipy.sparse import csr_matrix, hstack
X_kw_train_sparse = csr_matrix(X_kw_train)
X_kw_test_sparse  = csr_matrix(X_kw_test)
X_train_all = hstack([X_train_tfidf, X_kw_train_sparse])
X_test_all  = hstack([X_test_tfidf, X_kw_test_sparse])

# --- 9.3) 오버샘플링 최종 적용 ---
sampler_choice = best_params["sampler"]
if sampler_choice == "smote":
    sampler = SMOTE(random_state=42)
    X_res, y_res = sampler.fit_resample(X_train_all, y_train)
elif sampler_choice == "adasyn":
    sampler = ADASYN(random_state=42)
    X_res, y_res = sampler.fit_resample(X_train_all, y_train)
else:
    X_res, y_res = X_train_all, y_train

# --- 9.4) 최적 XGBoost 파라미터 적용 ---
param_final = {
    "objective": "multi:softmax",
    "num_class": len(le.classes_),
    "tree_method": "hist",
    "device": "cuda",
    "eval_metric": "mlogloss",
    "eta": best_params["eta"],
    "max_depth": best_params["max_depth"],
    "gamma": best_params["gamma"],
    "min_child_weight": best_params["min_child_weight"],
    "subsample": best_params["subsample"],
    "colsample_bytree": best_params["colsample_bytree"],
    "reg_alpha": best_params["reg_alpha"],
    "reg_lambda": best_params["reg_lambda"],
}
final_num_boost_round = best_params["num_boost_round"]

dtrain_final = xgb.DMatrix(X_res, label=y_res)
dtest_final  = xgb.DMatrix(X_test_all, label=y_test)
watchlist_final = [(dtrain_final, "train"), (dtest_final, "eval")]

final_booster = xgb.train(
    params=param_final,
    dtrain=dtrain_final,
    num_boost_round=final_num_boost_round,
    evals=watchlist_final,
    early_stopping_rounds=10,
    verbose_eval=False
)

best_iter = final_booster.best_iteration
y_pred = final_booster.predict(dtest_final, iteration_range=(0, best_iter+1))

##############################
# 10) 최종 성능 평가 및 저장
##############################
from sklearn.metrics import classification_report
print("\n[최종 성능 평가]")
print(classification_report(y_test, y_pred, target_names=le.classes_))

final_booster.save_model("best_booster.model")
joblib.dump(final_vectorizer, "final_vectorizer.joblib")
joblib.dump(le, "label_encoder.joblib")
print("최적 모델 및 벡터라이저, 라벨 인코더 저장 완료!")


100%|██████████| 147012/147012 [00:31<00:00, 4726.85it/s]
[I 2025-04-04 04:40:16,882] A new study created in memory with name: no-name-04d621c9-91d8-4b01-8d16-f622585391a2
[I 2025-04-04 04:42:00,959] Trial 0 finished with value: 0.7527170367949736 and parameters: {'analyzer': 'char', 'ngram_upper': 2, 'sampler': 'none', 'eta': 0.04777742998984467, 'max_depth': 7, 'gamma': 2.8223085590251045, 'min_child_weight': 4, 'subsample': 0.8723606974982616, 'colsample_bytree': 0.9511946509113967, 'reg_alpha': 1.3372191429854585e-05, 'reg_lambda': 1.6433391341350338e-07, 'num_boost_round': 419}. Best is trial 0 with value: 0.7527170367949736.
[I 2025-04-04 04:42:56,438] Trial 1 finished with value: 0.6660056019566613 and parameters: {'analyzer': 'char', 'ngram_upper': 2, 'sampler': 'none', 'eta': 0.025996792571438866, 'max_depth': 7, 'gamma': 4.922976123066266, 'min_child_weight': 6, 'subsample': 0.9622247115889808, 'colsample_bytree': 0.7704380064279576, 'reg_alpha': 0.0004704806415725675, 'reg_l

[Optuna] Best score: 0.7805327439544726
[Optuna] Best params: {'analyzer': 'char', 'ngram_upper': 3, 'sampler': 'none', 'eta': 0.0822075335012693, 'max_depth': 15, 'gamma': 0.39553296799113347, 'min_child_weight': 3, 'subsample': 0.7613705543277253, 'colsample_bytree': 0.7084088713502827, 'reg_alpha': 1.4675569643363483e-05, 'reg_lambda': 0.03529364799649397, 'num_boost_round': 278}

[최종 성능 평가]
              precision    recall  f1-score   support

          교육       0.86      0.63      0.73      1554
          교통       0.89      0.77      0.82      1249
          기타       0.85      0.73      0.78      5423
          문화       0.88      0.62      0.73      2474
          쇼핑       0.73      0.42      0.54      3552
          식비       0.75      0.97      0.85     13473
          의료       0.92      0.66      0.77       869
          주거       0.98      0.91      0.94       809

    accuracy                           0.79     29403
   macro avg       0.86      0.71      0.77     29403
weight



최적 모델 및 벡터라이저, 라벨 인코더 저장 완료!


In [None]:
from joblib import dump, load
import dill
import re
from konlpy.tag import Okt
import xgboost as xgb

okt = Okt()

# 학습 시 사용한 전처리 함수와 동일하게 정의
def only_nouns(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[^가-힣0-9a-z\s]", "", text)
    nouns = okt.nouns(text)
    stopwords = {"점", "센터", "코너", "플라자", "주식회사", "유한회사"}
    tokens = [w for w in nouns if w not in stopwords and len(w) > 1]
    return " ".join(tokens).strip()

# 모델 저장 (학습 완료된 Booster 객체를 final_booster라고 가정)
final_booster.save_model("best_booster.model")
dill.dump(final_vectorizer, open("final_vectorizer.joblib", "wb"))
dill.dump(le, open("label_encoder.joblib", "wb"))
print("모델, 벡터라이저, 라벨 인코더 저장 완료!")

# 단일 예측 함수: 전처리 단계에 only_nouns 함수 사용
def predict_category(store_name):
    store_name_processed = only_nouns(store_name)
    X_new = final_vectorizer.transform([store_name_processed])
    dmatrix = xgb.DMatrix(X_new)
    pred = final_booster.predict(dmatrix)
    return le.inverse_transform([int(pred[0])])[0]

# 예측 테스트
test_input = "하이내과"
print("상호명:", test_input, "-> 예측 카테고리:", predict_category(test_input))




모델, 벡터라이저, 라벨 인코더 저장 완료!
상호명: 하이내과 -> 예측 카테고리: 의료


In [None]:
# 필요한 라이브러리 설치 (Colab 런타임 재시작 후 실행)
!pip install dill nest-asyncio pyngrok fastapi uvicorn

import dill
import nest_asyncio
from pyngrok import ngrok
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import re
from konlpy.tag import Okt
import xgboost as xgb

# 1. 저장된 모델 및 전처리기 로드 (Dill 사용)
with open('final_vectorizer.joblib', 'rb') as f:
    final_vectorizer = dill.load(f)
with open('label_encoder.joblib', 'rb') as f:
    le = dill.load(f)

booster = xgb.Booster()
booster.load_model('best_booster.model')

# 2. 전처리 함수 (학습시와 동일)
okt = Okt()
def only_nouns(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[^가-힣0-9a-z\s]", "", text)
    nouns = okt.nouns(text)
    stopwords = {"점", "센터", "코너", "플라자", "주식회사", "유한회사"}
    tokens = [w for w in nouns if w not in stopwords and len(w) > 1]
    return " ".join(tokens).strip()

# 3. FastAPI 앱 설정
app = FastAPI()

class StoreRequest(BaseModel):
    store_name: str

@app.post("/predict")
async def predict(request: StoreRequest):
    processed_text = only_nouns(request.store_name)
    X = final_vectorizer.transform([processed_text])
    dmatrix = xgb.DMatrix(X)
    pred = booster.predict(dmatrix)
    return {
        "store_name": request.store_name,
        "category": le.inverse_transform([int(pred[0])])[0]
    }

# 4. Colab에서 서버 실행
nest_asyncio.apply()
public_url = ngrok.connect(8001).public_url
print(f"🔥 서버 실행 중! 접속 URL: {public_url}")

uvicorn.run(app, host="0.0.0.0", port=8001)

🔥 서버 실행 중! 접속 URL: https://ab6b-35-240-156-58.ngrok-free.app


INFO:     Started server process [3359]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit)


INFO:     14.46.141.248:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     14.46.141.248:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     14.46.141.248:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     14.46.141.248:0 - "PO

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [3359]


In [None]:
# 1. 모든 파일을 zip으로 압축
!zip model_files.zip best_booster.model *.joblib

# 2. 압축 파일 다운로드
from google.colab import files
files.download('model_files.zip')

  adding: best_booster.model (deflated 57%)
  adding: category_mapping.joblib (deflated 59%)
  adding: final_vectorizer.joblib (deflated 55%)
  adding: label_encoder.joblib (deflated 15%)
  adding: vectorizer.joblib (stored 0%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>