In [None]:
# import requests
# from PIL import Image
# from transformers import BlipProcessor, BlipForConditionalGeneration

# processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
# model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")

# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
# raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

# # conditional image captioning
# text = "a photography of"
# inputs = processor(raw_image, text, return_tensors="pt").to("cuda")

# out = model.generate(**inputs)
# print(processor.decode(out[0], skip_special_tokens=True))

In [1]:
import os
import time
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

In [2]:
# 모델 & 프로세서 로드
model_name = "Salesforce/blip-image-captioning-base"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name).to("cuda")

preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("사용 중인 디바이스:", device)

사용 중인 디바이스: cuda


In [4]:
import os
import pickle
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

class KTSDatasetTorch(Dataset):
    def __init__(self, data_address, mode="train", transform=None):
        self.data_address = data_address
        self.image_dir = os.path.join(self.data_address, "images")
        self.transform = transform if transform else self.default_transform()

        # pickle 파일 로드
        pickle_file = os.path.join(self.data_address, f"{mode}.pickle")
        with open(pickle_file, "rb") as fr:
            self.dataset = pickle.load(fr)

        # 이미지 경로, 텍스트, 해시태그 추출
        self.image_paths = []
        self.texts = []
        self.tags = []

        for data in self.dataset:
            if "img_name" in data and "hashtag" in data and data["hashtag"] and "text" in data:
                img_path = os.path.join(self.image_dir, data["img_name"].replace("\\", "/").split("images/")[-1])
                if os.path.exists(img_path):
                    self.image_paths.append(img_path)
                    self.texts.append(data["text"])
                    self.tags.append(data["hashtag"])  

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        text = self.texts[idx]
        tag = self.tags[idx]

        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        return image, text, tag


    def default_transform(self):
        return transforms.Compose([
            transforms.Resize((84, 84)),
            transforms.ToTensor(),
        ])


In [5]:
# 데이터셋 생성
train_dataset = KTSDatasetTorch(data_address="/kaggle/input/korean-tourist-spot-dataset/Korean-Tourist-Spot-Dataset-master/kts/", mode="train", transform=None )
valid_dataset = KTSDatasetTorch(data_address="/kaggle/input/korean-tourist-spot-dataset/Korean-Tourist-Spot-Dataset-master/kts/", mode="val", transform=None )
test_dataset = KTSDatasetTorch(data_address="/kaggle/input/korean-tourist-spot-dataset/Korean-Tourist-Spot-Dataset-master/kts/", mode="test", transform=None )


# train + valid 합치기
train_valid_dataset = ConcatDataset([train_dataset, valid_dataset])

In [6]:
from torch.utils.data import DataLoader

#  collate_fn 정의: 이미지, 텍스트, 해시태그 리스트를 그대로 묶어줌
def custom_collate_fn(batch):
    images, texts, tags = zip(*batch)
    return list(images), list(texts), list(tags)

#  DataLoader 구성
train_valid_loader = DataLoader(
    train_valid_dataset,
    batch_size=8,
    shuffle=True,
    collate_fn=custom_collate_fn
)
test_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    collate_fn=custom_collate_fn
)

In [7]:
sample = train_valid_dataset[-1]
print("타입:", type(sample))
print("길이:", len(sample))

for i, item in enumerate(sample):
    print(f"\n--- sample[{i}] ---")
    print("타입:", type(item))
    print(item)

타입: <class 'tuple'>
길이: 3

--- sample[0] ---
타입: <class 'torch.Tensor'>
tensor([[[0.0745, 0.0745, 0.0745,  ..., 0.0627, 0.0627, 0.0627],
         [0.0745, 0.0745, 0.0745,  ..., 0.0627, 0.0627, 0.0627],
         [0.0745, 0.0745, 0.0745,  ..., 0.0627, 0.0627, 0.0627],
         ...,
         [0.4078, 0.3569, 0.3216,  ..., 0.2118, 0.2235, 0.2039],
         [0.3647, 0.3333, 0.3059,  ..., 0.2157, 0.2784, 0.2941],
         [0.3373, 0.3216, 0.2980,  ..., 0.2196, 0.2627, 0.2392]],

        [[0.0706, 0.0706, 0.0706,  ..., 0.0627, 0.0627, 0.0627],
         [0.0706, 0.0706, 0.0706,  ..., 0.0627, 0.0627, 0.0627],
         [0.0745, 0.0745, 0.0745,  ..., 0.0627, 0.0627, 0.0627],
         ...,
         [0.4157, 0.3569, 0.3176,  ..., 0.2118, 0.2196, 0.2039],
         [0.3725, 0.3333, 0.3020,  ..., 0.2196, 0.2745, 0.2902],
         [0.3412, 0.3216, 0.2980,  ..., 0.2157, 0.2549, 0.2353]],

        [[0.0627, 0.0627, 0.0667,  ..., 0.0588, 0.0549, 0.0549],
         [0.0627, 0.0627, 0.0667,  ..., 0.0549, 0.0

In [8]:
from torch.optim import AdamW
optimizer = AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)

def train_step(batch):
    model.train()
    images, texts, tags = batch

    # processor로 이미지 + 텍스트 인코딩
    inputs = processor(
        images=images,
        text=texts,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=35,
        do_rescale=False
    ).to(device)

    # 정답 해시태그들을 문자열로 변환
    tags_text = [" ".join(tag_list) for tag_list in tags]
    tokenized = processor.tokenizer(
        tags_text,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=35
    )
    labels = tokenized.input_ids.to(device)
    labels[labels == processor.tokenizer.pad_token_id] = -100

    # 모델 학습
    outputs = model(
        input_ids=inputs["input_ids"], 
        attention_mask=inputs["attention_mask"],
        pixel_values=inputs["pixel_values"],
        labels=labels
    )

    loss = outputs.loss
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    return loss.item()


In [9]:
import torch

# 저장 경로
model_save_path = "/kaggle/working/best_model_epoch.pth"

# 학습 파라미터 설정
num_epochs = 10
log_interval = 100
patience_limit = 2
best_loss = float('inf')
patience_counter = 0

for epoch in range(num_epochs):
    total_loss = 0
    model.train()

    for step, batch in enumerate(train_valid_loader):
        loss = train_step(batch)
        total_loss += loss

        if (step + 1) % log_interval == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{step+1}], Loss: {loss:.4f}")

    avg_loss = total_loss / len(train_valid_loader)
    print(f"Epoch {epoch+1} 완료! 평균 손실: {avg_loss:.4f}")

    # Early Stopping 로직
    if avg_loss < best_loss:
        best_loss = avg_loss
        patience_counter = 0
        print(f"손실 개선! best_loss 갱신: {best_loss:.4f}")
        
        # 모델 저장
        torch.save(model.state_dict(), model_save_path)
        print(f"모델 저장됨: {model_save_path}")
    else:
        patience_counter += 1
        print(f"손실 개선 없음. patience: {patience_counter}/{patience_limit}")
        if patience_counter >= patience_limit:
            print("Early stopping triggered!")
            break

Epoch [1/10], Step [100], Loss: 3.4983
Epoch [1/10], Step [200], Loss: 3.7203
Epoch [1/10], Step [300], Loss: 3.2626
Epoch [1/10], Step [400], Loss: 3.2672
Epoch [1/10], Step [500], Loss: 3.3531
Epoch [1/10], Step [600], Loss: 3.4306
Epoch [1/10], Step [700], Loss: 3.8861
Epoch [1/10], Step [800], Loss: 3.2717
Epoch [1/10], Step [900], Loss: 3.5606
Epoch [1/10], Step [1000], Loss: 4.3065
Epoch 1 완료! 평균 손실: 3.5819
손실 개선! best_loss 갱신: 3.5819
모델 저장됨: /kaggle/working/best_model_epoch.pth
Epoch [2/10], Step [100], Loss: 3.0876
Epoch [2/10], Step [200], Loss: 3.0852
Epoch [2/10], Step [300], Loss: 3.0569
Epoch [2/10], Step [400], Loss: 3.4193
Epoch [2/10], Step [500], Loss: 3.2077
Epoch [2/10], Step [600], Loss: 3.3603
Epoch [2/10], Step [700], Loss: 3.8099
Epoch [2/10], Step [800], Loss: 3.4035
Epoch [2/10], Step [900], Loss: 4.1371
Epoch [2/10], Step [1000], Loss: 3.5356
Epoch 2 완료! 평균 손실: 3.2355
손실 개선! best_loss 갱신: 3.2355
모델 저장됨: /kaggle/working/best_model_epoch.pth
Epoch [3/10], Step [

In [None]:
# # 모델 저장
# base_dir = "/kaggle/working/"
# model_save_path = os.path.join(base_dir, "blip_hashtag_model_epoch3.pth")
# torch.save(model.state_dict(), model_save_path)
# print(f"모델 저장 완료: {model_save_path}")

In [4]:
# 모델 불러오기
model.load_state_dict(torch.load("/kaggle/working/best_model_epoch.pth"))
model.eval()  # 평가 모드로 전환

# base_dir = "/kaggle/working/"
# model_save_path = os.path.join(base_dir, "blip_hashtag_model_epoch3.pth")
# model.load_state_dict(torch.load(model_save_path))
# model.to(device)
# print("모델 로드 완료!")

  model.load_state_dict(torch.load("/kaggle/working/best_model_epoch.pth"))


BlipForConditionalGeneration(
  (vision_model): BlipVisionModel(
    (embeddings): BlipVisionEmbeddings(
      (patch_embedding): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (encoder): BlipEncoder(
      (layers): ModuleList(
        (0-11): 12 x BlipEncoderLayer(
          (self_attn): BlipAttention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (projection): Linear(in_features=768, out_features=768, bias=True)
          )
          (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): BlipMLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
          )
          (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
    (post_layernorm): LayerNorm((768,), eps=1e-0

In [5]:
def generate_hashtags(images, texts, max_tags=5):
    inputs = processor(
        images=images,
        text=texts,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=128  # 길이 제한 추가
    ).to(device)

    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=64,
            num_beams=5,
            repetition_penalty=1.2,
            no_repeat_ngram_size=2
        )

    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
    # 해시태그만 추출하도록 후처리
    return [text.strip().split()[:max_tags] for text in generated_text]



In [47]:
import os
from PIL import Image
from torch.utils.data import Dataset
import pandas as pd

class CSVImageTextTagDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = transform

        # CSV 경로의 중간 디렉토리명을 실제 폴더 이름으로 치환
        self.image_paths = [
            os.path.join(
                "/kaggle/input/korean-tourist-spot-dataset",
                rel_path.replace("Korean-Tourist-Spot-Dataset", "Korean-Tourist-Spot-Dataset-master")
            )
            for rel_path in self.data["Image Path"]
        ]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        row = self.data.iloc[idx]
        text = row["Text"]
        tags = row["Hashtag"].split()

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, text, tags


In [57]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
    # Normalize는 제거!
])


csv_path = "/kaggle/input/dataset-eval/dataset_for_evaluate.csv"
test_dataset = CSVImageTextTagDataset(csv_path, transform=transform)
 

In [55]:
test_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    collate_fn=custom_collate_fn
)

In [58]:
import pandas as pd

results = []

# test_dataset을 직접 순회해서 image_path까지 접근
for idx in range(len(test_dataset)):
    image_tensor, text, true_tags = test_dataset[idx]
    image_path = test_dataset.image_paths[idx]  # 경로 가져오기

    # 배치 차원 추가 후 추론
    image_tensor = image_tensor.unsqueeze(0).to(device)
    pred_tags = generate_hashtags(image_tensor, [text])[0]

    results.append({
        "Image Path": image_path,
        "Text": text,
        "Hashtag": true_tags,
        
        "generated_tags": pred_tags
    })

# DataFrame으로 정리
df_results = pd.DataFrame(results)
df_results.head()

It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


Unnamed: 0,Image Path,Text,Hashtag,generated_tags
0,/kaggle/input/korean-tourist-spot-dataset/Kore...,사탕이 흩뿌려진다~,[83타워전망대],"[사탕이, ~ᆯᄉᆯ, #, #ᅡᅮᄎᄎᄉ, #ᅩᆫᅡᆨᅦᆼᅮᄑᄑᄌ지ᅵᅩᅮᄉ디..."
1,/kaggle/input/korean-tourist-spot-dataset/Kore...,오룩도 경치 오오오오오 뭐야뭐야 너무 끝내주자너,"[부산,, 오륙도]","[오룩도, 경치, 오오오오오, 너무, #]"
2,/kaggle/input/korean-tourist-spot-dataset/Kore...,올해 최고 한파가 있던 날 소백산 산행.... 몸을 제다로 가누지 못할 만큼의 칼바...,"[등산스타그램,, 산스타그램,, 단양,, 소백산]","[올해, 한파가, 날, 소백산, 산행....]"
3,/kaggle/input/korean-tourist-spot-dataset/Kore...,궁은 이쁜데 하늘이 너무 흐리다,"[서울,, 창경궁,, 나들이,, 눈,, 눈온날,, 설경,, 사진그램,, 사진스타그램]","[궁은, 하늘이, 너무, 흐리다, #]"
4,/kaggle/input/korean-tourist-spot-dataset/Kore...,빛가람전망대 모노레일 탑승시간 2분여 한번은 탈만 함 봄 이되면 또 와야겠다..,"[모노레일,, 빛가람전망대,, 봄,, 나주]","[모노레일, 탑승시간, 2분여, 한번은, 타..."


In [60]:
import unicodedata
import re

# 1. 정제 함수 정의
def clean_hashtags(raw_tags, min_length=2):
    import unicodedata
    import re

    clean_set = set()
    for tag in raw_tags:
        tag = unicodedata.normalize("NFC", tag)
        if not tag.startswith("#"):
            tag = "#" + tag
        tag = re.sub(r"[^\u3131-\u3163\uac00-\ud7a3a-zA-Z0-9_#]", "", tag)
        if len(tag) <= min_length or tag == "#":
            continue
        clean_set.add(tag)
    return list(clean_set)

# 2. 한 줄씩 정제 함수 적용
df_results['cleaned_tags'] = df_results['generated_tags'].apply(clean_hashtags)

# 3. 결과 확인
df_results.head()



Unnamed: 0,Image Path,Text,Hashtag,generated_tags,cleaned_tags
0,/kaggle/input/korean-tourist-spot-dataset/Kore...,사탕이 흩뿌려진다~,[83타워전망대],"[사탕이, ~ᆯᄉᆯ, #, #ᅡᅮᄎᄎᄉ, #ᅩᆫᅡᆨᅦᆼᅮᄑᄑᄌ지ᅵᅩᅮᄉ디...","[#사탕이, #지딜]"
1,/kaggle/input/korean-tourist-spot-dataset/Kore...,오룩도 경치 오오오오오 뭐야뭐야 너무 끝내주자너,"[부산,, 오륙도]","[오룩도, 경치, 오오오오오, 너무, #]","[#너무, #오오오오오, #경치, #오룩도]"
2,/kaggle/input/korean-tourist-spot-dataset/Kore...,올해 최고 한파가 있던 날 소백산 산행.... 몸을 제다로 가누지 못할 만큼의 칼바...,"[등산스타그램,, 산스타그램,, 단양,, 소백산]","[올해, 한파가, 날, 소백산, 산행....]","[#산행, #한파가, #소백산, #올해]"
3,/kaggle/input/korean-tourist-spot-dataset/Kore...,궁은 이쁜데 하늘이 너무 흐리다,"[서울,, 창경궁,, 나들이,, 눈,, 눈온날,, 설경,, 사진그램,, 사진스타그램]","[궁은, 하늘이, 너무, 흐리다, #]","[#너무, #하늘이, #흐리다, #궁은]"
4,/kaggle/input/korean-tourist-spot-dataset/Kore...,빛가람전망대 모노레일 탑승시간 2분여 한번은 탈만 함 봄 이되면 또 와야겠다..,"[모노레일,, 빛가람전망대,, 봄,, 나주]","[모노레일, 탑승시간, 2분여, 한번은, 타...","[#탈만, #한번은, #모노레일, #2분여, #탑승시간]"


In [62]:
# 결과 DataFrame 저장
df_results.to_csv("/kaggle/working/blip_test_results_epcoh10_25.csv", index=False)

In [33]:
df_results.head()

Unnamed: 0,image_path,text,true_tags,generated_tags,cleaned_tags
0,/kaggle/input/korean-tourist-spot-dataset/Kore...,후덜덜,[#경주월드],"[후덜덜ᅡᅡᆯᆯ, #, #ᅩᅩᅮᅮᄌ지ᅵ, #ᅢᅢᅮᅩᆨᆨᄒᄒ, #ᅮᅡ호ᅡᅮᄋ]",[#후덜덜]
1,/kaggle/input/korean-tourist-spot-dataset/Kore...,즐거웠어 오늘 하루도 잘있어,"[#24살, #썰매장, #알바생, #퇴사후, #일중독, #경주월드, #경주월드눈썰매...","[오늘, 하루도, #, #ᅩᆼᄎᄒ혹ᅮᆫᄋᆼᅥᆫᄎᄎᄉ, ᄀ]","[#오늘, #하루도]"
2,/kaggle/input/korean-tourist-spot-dataset/Kore...,롯데월드,[#롯데월드],"[##ᅡᆯ, #ᅡᅡᅵᅵᄉ, #ᅩᅩᆼᆼᅵᆨᄉ시ᄋᄋᆼᅮᄉᆯ]",[]
3,/kaggle/input/korean-tourist-spot-dataset/Kore...,첫 에버랜드!!!..,"[#에버랜드, #everland, #주말데이트, #미세먼지, #마스크필수, #김밥,...","[에버랜드!!!..ᅡᅡᆯᆫᆫᆼᆫ, #, #ᅩᅮᆫᄋᆫᄎ산ᅮᅥᆫᅥᅡᆨᄉᆨᆫ...","[#산구, #에버랜드]"
4,/kaggle/input/korean-tourist-spot-dataset/Kore...,1월 11일날갔었는데 이제 올려버리기.. ..우헤헤 드라켄도 완전 재밌고 물배같...,"[#경주월드, #한화콘도, #뽀로로아쿠아빌리지, #드라켄, #관장님, #재밌었당, ...","[1월, 이제, 올려버리기...., 우헤헤, 드라켄도]","[#올려버리기, #우헤헤, #1월, #드라켄도, #이제]"


In [30]:
import numpy as np
from typing import List

def single_precision_k(truth, predict, k):
    if not predict[:k]:  # 빈 리스트일 때
        return 0.0
    matching = 0
    for tag in predict[:k]:
        if tag in truth:
            matching += 1
    return matching / len(predict[:k])



def precision_k(truth, predict, k=8):

    precision = [single_precision_k(t, p, k) for t, p in zip(truth, predict)]
    #print(precision)

    return np.average(precision, axis=0)

def single_recall_8(truth, predict, k):
    if not truth:  # 예외 처리
        return 0.0
    matching = 0
    for tag in predict[:k]:
        if tag in truth:
            matching += 1
    return matching / len(truth)



def recall_k(truth, predict, k=8):
    recall = [single_recall_8(t, p, k) for t, p in zip(truth, predict)]
    return sum(recall) / len(recall)


def f1_k(truth, predict, k=8):
    precision = precision_k(truth, predict, k)
    recall = recall_k(truth, predict, k)
    #print(precision, recall)

    return (2 * precision * recall) / (precision + recall)

In [31]:
true_tags = df_results["true_tags"].tolist()
pred_tags = df_results["cleaned_tags"].tolist()

p_at_8 = precision_k(true_tags, pred_tags, k=8)
r_at_8 = recall_k(true_tags, pred_tags, k=8)
f1_at_8 = f1_k(true_tags, pred_tags, k=8)

print(f"Precision@8: {p_at_8:.4f}")
print(f"Recall@8: {r_at_8:.4f}")
print(f"F1@8: {f1_at_8:.4f}")

Precision@8: 0.1581
Recall@8: 0.1006
F1@8: 0.1230


In [70]:
import ast
import pandas as pd
import numpy as np
import fasttext
import torch
from sentence_transformers import util
from scipy.optimize import linear_sum_assignment

# 정답 CSV의 Hashtag 컬럼 처리: 콤마로 구분된 문자열을 리스트로 변환
def process_gt_hashtag(hashtag_str):
    if pd.isna(hashtag_str):
        return []
    return [x.strip() for x in hashtag_str.split(',') if x.strip()]

# 예측 CSV의 generated_tags 컬럼 처리: 문자열 형태의 리스트를 ast.literal_eval로 파싱
def process_predicted_tags(tag_str):
    try:
        if isinstance(tag_str, str) and tag_str.startswith('['):
            tags = ast.literal_eval(tag_str)
            if isinstance(tags, list):
                return [str(t).strip() for t in tags]
        return [x.strip() for x in tag_str.split(',') if x.strip()]
    except Exception as e:
        return []

# FastText 기반 평가 지표: 각 이미지에 대해, top k 예측 태그와 ground truth 태그 간
# 헝가안 알고리즘을 통해 최적 매칭 후 threshold 이상의 매칭을 올바른 것으로 간주하여 계산
def fasttext_single_metrics(truth, predict, model_ft, k=8, threshold=0.6):
    pred_tags = predict[:k]
    if not pred_tags or not truth:
        return 0, 0, 0

    # 각 태그의 임베딩 계산
    pred_embeddings = [model_ft.get_word_vector(tag) for tag in pred_tags]
    truth_embeddings = [model_ft.get_word_vector(tag) for tag in truth]

    pred_tensor = torch.tensor(pred_embeddings)
    truth_tensor = torch.tensor(truth_embeddings)
    
    # 코사인 유사도 매트릭스 계산 (행: truth, 열: pred)
    sim_matrix = util.pytorch_cos_sim(truth_tensor, pred_tensor)
    cost = -sim_matrix.cpu().numpy()  # 최대 유사도를 위해 음수로 변환
    
    # 헝가안 알고리즘을 이용한 최적 매칭
    row_idx, col_idx = linear_sum_assignment(cost)
    
    # 매칭된 태그 쌍 중 threshold 이상의 유사도를 올바른 매칭으로 간주
    match_count = 0
    for i, j in zip(row_idx, col_idx):
        if sim_matrix[i, j].item() >= threshold:
            match_count += 1

    precision = match_count / len(pred_tags)
    recall = match_count / len(truth)
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1

def main():
    truth_list = df_results["Hashtag"].tolist()
    predict_list = df_results["cleaned_tags"].tolist()
    
    # FastText 모델 로드
    model_ft = fasttext.load_model('/kaggle/input/fasttext/pytorch/default/1/cc.ko.300.bin')
    
    # 각 샘플별 FastText 기반 평가 지표 계산 (top 8 예측 태그 기준)
    precisions = []
    recalls = []
    f1s = []
    
    for truth, pred in zip(truth_list, predict_list):
        p, r, f = fasttext_single_metrics(truth, pred, model_ft, k=8, threshold=0.6)
        precisions.append(p)
        recalls.append(r)
        f1s.append(f)
    
    avg_precision = np.average(precisions)
    avg_recall = np.average(recalls)
    avg_f1 = np.average(f1s)
    
    print("FastText Precision:", avg_precision)
    print("FastText Recall:", avg_recall)
    print("FastText F1 Score:", avg_f1)
    
    # 평가 결과를 병합된 DataFrame에 추가
    df_results["fasttext_precision"] = precisions
    df_results["fasttext_recall"] = recalls
    df_results["fasttext_f1"] = f1s
    
    # 저장할 최종 컬럼만 선택
    cols_to_save = ["Image Path", "Text", "Hashtag", "cleaned_tags", 
                    "fasttext_precision", "fasttext_recall", "fasttext_f1"]
    df_final = df_results[cols_to_save]
    
    # CSV로 저장
    output_file = "/kaggle/working/evaluation_results_fasttext_epoch10.csv"
    df_final.to_csv(output_file, index=False, encoding="utf-8-sig")
    print("평가 결과 CSV가 저장되었습니다:", output_file)

if __name__ == "__main__":
    main()


FastText Precision: 0.02586206896551724
FastText Recall: 0.02586206896551724
FastText F1 Score: 0.02586206896551724
평가 결과 CSV가 저장되었습니다: /kaggle/working/evaluation_results_fasttext_epoch10.csv
