In [None]:
# 1. 인증
from google.colab import auth
auth.authenticate_user()

In [None]:
# 2. 라이브러리 임포트
from google.cloud import storage
from transformers import SwinForImageClassification, AutoImageProcessor
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageOps
import requests
import pandas as pd
import torch
import os
from tqdm import tqdm

In [None]:
# 3. GCS 설정
project_id = "valid-might-460212-k2"
bucket_name = "de-project2-bucket-1"

In [None]:
# 경로 설정
csv_gcs_path = "kyobo/csv/combined/after_preprocessing_novel.csv"
model_gcs_dir = "models/swin-tiny-novel/checkpoint-72"
output_csv_local = "/content/novel_image_classification.csv"
output_csv_gcs_path = "kyobo/csv/combined/novel-image_classification.csv"

client = storage.Client(project=project_id)
bucket = client.bucket(bucket_name)

In [None]:
# 4. CSV 다운로드
local_csv_path = "/content/after_preprocessing_novel.csv"
bucket.blob(csv_gcs_path).download_to_filename(local_csv_path)
print("✅ CSV 다운로드 완료")

✅ CSV 다운로드 완료


In [None]:
# 5. 모델 다운로드 및 로딩
local_model_path = "/content/swin-tiny-output/checkpoint-72"
os.makedirs(local_model_path, exist_ok=True)

In [None]:
# 모델 디렉토리 전체 다운로드
for blob in bucket.list_blobs(prefix=model_gcs_dir):
    if blob.name.endswith("/"): continue
    rel_path = os.path.relpath(blob.name, model_gcs_dir)
    dest_path = os.path.join(local_model_path, rel_path)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    blob.download_to_filename(dest_path)
    print(f"📦 모델 파일 다운로드: {rel_path}")

📦 모델 파일 다운로드: config.json
📦 모델 파일 다운로드: model.safetensors
📦 모델 파일 다운로드: optimizer.pt
📦 모델 파일 다운로드: rng_state.pth
📦 모델 파일 다운로드: scaler.pt
📦 모델 파일 다운로드: scheduler.pt
📦 모델 파일 다운로드: trainer_state.json
📦 모델 파일 다운로드: training_args.bin


In [None]:
from transformers import AutoImageProcessor
processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [None]:
# 모델 로드
model = SwinForImageClassification.from_pretrained(local_model_path)
model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
id2label = model.config.id2label

In [None]:
# 6. 전처리 및 데이터셋 정의
class PadToSquare:
    def __init__(self, fill_color=(255, 255, 255)):
        self.fill_color = fill_color
    def __call__(self, image):
        w, h = image.size
        max_side = max(w, h)
        padding = (
            (max_side - w) // 2, (max_side - h) // 2,
            (max_side - w) - (max_side - w) // 2,
            (max_side - h) - (max_side - h) // 2
        )
        return ImageOps.expand(image, padding, fill=self.fill_color)

In [None]:
transform = transforms.Compose([
    PadToSquare(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std),
])

In [None]:
class InferenceDataset(Dataset):
    def __init__(self, dataframe, transform):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        url = self.df.loc[idx, 'image_url']
        product_id = self.df.loc[idx, 'product_id']
        try:
            image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
            image = self.transform(image)
        except:
            print(f"이미지 불러오기 실패: {product_id}")
            image = torch.zeros(3, 224, 224)
        return {'pixel_values': image, 'product_id': product_id}

In [None]:
# 7. 추론 수행
df = pd.read_csv(local_csv_path)
dataset = InferenceDataset(df, transform)
dataloader = DataLoader(dataset, batch_size=1)

device = "cuda" if torch.cuda.is_available() else "cpu"
results = []

with torch.no_grad():
    for batch in tqdm(dataloader):
        images = batch['pixel_values'].to(device)
        outputs = model(pixel_values=images)
        preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
        labels = [id2label[p] for p in preds]  # ✅ 수정됨
        results.extend(zip(batch['product_id'], labels))

100%|██████████| 63326/63326 [2:47:19<00:00,  6.31it/s]


In [None]:
# 8. 결과 저장
output_df = pd.DataFrame(results, columns=["product_id", "design_label"])
output_df.to_csv(output_csv_local, index=False)
print("✅ 추론 결과 저장 완료:", output_csv_local)

✅ 추론 결과 저장 완료: /content/image_classification1.csv


In [None]:
# 9. GCS로 결과 업로드
bucket.blob(output_csv_gcs_path).upload_from_filename(output_csv_local)
print(f"✅ 결과 CSV 업로드 완료 → gs://{bucket_name}/{output_csv_gcs_path}")

✅ 결과 CSV 업로드 완료 → gs://de-project2-bucket-1/kyobo/csv/combined/image_classification1.csv
