In [None]:
from huggingface_hub import login
login()

In [None]:
import torch
from PIL import Image
from transformers import AutoModelForCausalLM

MODEL_NAME = "vikhyatk/moondream2"

model = AutoModelForCausalLM.from_pretrained(

MODEL_NAME,

trust_remote_code=True,

torch_dtype=torch.float16
)

total_params = sum(p.numel() for p in model.parameters())

print(f"모델: {MODEL_NAME}")
print(f"총 파라미터 수: {total_params:,}")

# 백만(Million), 십억(Billion) 단위 출력
if total_params >= 1_000_000_000:
    print(f"{total_params / 1_000_000_000:.2f}B (십억) 개의 파라미터")
else:
    print(f"{total_params / 1_000_000:.2f}M (백만) 개의 파라미터")



---



In [None]:
import os
import re
import torch
import pandas as pd
from transformers import AutoProcessor
import zipfile
from google.colab import drive
from tqdm.auto import tqdm

drive.mount('/content/drive')
base_path = "/content/drive/MyDrive/Colab Notebooks/Dacon/SCPC/"

zip_path = os.path.join(base_path, 'data/open.zip')
extract_path = '/content/data'
os.makedirs(extract_path, exist_ok=True)

print(f"Extracting {zip_path} to {extract_path}...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("Extraction complete.")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_PATH = "vikhyatk/moondream2"

print(f"Using device: {device}")
print("Loading model and processor...")

# Moondream2 모델 로드
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    trust_remote_code=True,
    torch_dtype=torch.float16, # or "auto"
    device_map={"": device}
)

# 프로세서(토크나이저 포함) 로드
processor = AutoProcessor.from_pretrained(MODEL_PATH)
extract_path = '/content/data'
TEST_CSV_PATH = os.path.join(extract_path, 'test.csv')
TEST_IMG_DIR = os.path.join(extract_path, 'test_input_images')
SUBMISSION_PATH = os.path.join('/content/data', 'submission2.csv')
test_df = pd.read_csv(TEST_CSV_PATH)

In [None]:
results = []

for index, row in tqdm(test_df.iterrows(), total=test_df.shape[0], desc="Predicting"):
    img_path = os.path.join(TEST_IMG_DIR, row['img_path'].split('/')[-1])
    question = row['Question']
    options = {'A': row['A'], 'B': row['B'], 'C': row['C'], 'D': row['D']}

    try:
        image = Image.open(img_path)
    except FileNotFoundError:
        # 이미지를 찾을 수 없는 경우, 건너뛰거나 기본값 처리
        results.append({'ID': row['ID'], 'answer': 'A'}) # 기본값으로 'A' 선택
        print(f"Warning: Image not found at {img_path}. Defaulting to 'A'.")
        continue

    # 프롬프트 구성
    prompt = f"Question: {question}\nOptions:\nA: {options['A']}\nB: {options['B']}\nC: {options['C']}\nD: {options['D']}\nBased on the image, which option is correct? Please provide only the letter of the correct option."

    # 답변 생성
    generated_answer = model.answer_question(
        image,
        prompt,
        tokenizer=processor
    )

    # 생성된 답변 후처리 및 선택
    final_choice = ''
    # 답변을 대문자로 변환하고 공백 제거
    processed_text = generated_answer.strip().upper()

    # 답변에 선택지 텍스트가 그대로 포함되어 있는지 확인
    for choice, text in options.items():
        if text.upper() in processed_text:
            final_choice = choice
            break

    # 만약 위에서 선택되지 않았다면, 답변에 알파벳 문자가 직접 포함되어 있는지 확인
    if not final_choice:
        # 정규식을 사용해 'A', 'B', 'C', 'D' 문자를 찾음
        found_letters = re.findall(r'\b[A-D]\b', processed_text)
        if found_letters:
            final_choice = found_letters[0]

    # 그래도 선택되지 않았다면, 기본값 'A'로 설정
    if not final_choice:
        final_choice = 'A'

    results.append({'ID': row['ID'], 'answer': final_choice})

print("\nPrediction complete.")

In [None]:
# 결과를 DataFrame으로 변환
submission_df = pd.DataFrame(results)

# 결과 확인
print("\n--- Generated Submission File (Top 5) ---")
print(submission_df.head())

# CSV 파일로 저장
submission_df.to_csv(SUBMISSION_PATH, index=False)

print(f"Submission file saved successfully at '{SUBMISSION_PATH}'")