In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
import time
import random
import glob
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributed as dist
from torch.utils.data import Dataset, DataLoader, DistributedSampler
from torch.multiprocessing.spawn import spawn
from torchvision.models.detection import ssd300_vgg16
from torchvision.datasets import VOCSegmentation
from torchvision.io import read_image
from torchvision.transforms import v2
from gensim.models import KeyedVectors

In [3]:
model_path = './GoogleNews-vectors-negative300.bin'
wv = KeyedVectors.load_word2vec_format(model_path, binary=True)

In [5]:
stoi = wv.key_to_index         # word → idx
itos = wv.index_to_key        # idx → word

embeddings = torch.from_numpy(wv.vectors)  
emb_norm = F.normalize(embeddings,    dim=1)   

def vec(word):
    """単語ベクトル(torch.Tensor[300]）を返す"""
    idx = stoi.get(word)
    if idx is None:
        raise KeyError(f"単語 '{word}' が語彙にありません")
    return embeddings[idx]

In [20]:
target = vec('hotdog') -  vec('bread')
target_norm = F.normalize(target.unsqueeze(0), dim=1) 

sim = torch.mm(emb_norm, target_norm.t()).squeeze(1)  

topk = 10
values, indices = torch.topk(sim, k=topk)

print("----- hotdog - bread の類似語 -----")
for score, idx in zip(values, indices):
    print(f"{itos[idx]}: {score:.4f}")

----- hotdog - bread の類似語 -----
hotdog: 0.6288
hotdogs: 0.3477
Hard_Rock_Café: 0.3347
Bowzer: 0.3280
Universal_CityWalk: 0.3232
Zooperstars: 0.3146
Hooters: 0.3116
Rollergirl: 0.3109
beer_bong: 0.3107
Sneaky_Pete: 0.3078
