Get pokemon data from pokeAPI

In [1]:
import requests
import json
import os

# 定义保存图片的目录
save_directory = "data/pokemon_images"
os.makedirs(save_directory, exist_ok=True)

# 获取所有宝可梦的基本信息
url = "https://pokeapi.co/api/v2/pokemon?limit=10000"
response = requests.get(url)
data = response.json()

# 创建一个字典来存储宝可梦的名称和属性
pokemon_info = {}

# 遍历每个宝可梦
for pokemon in data['results']:
    pokemon_name = pokemon['name']
    pokemon_url = pokemon['url']
    response = requests.get(pokemon_url)
    pokemon_data = response.json()

    # 获取宝可梦的属性
    types = pokemon_data['types']
    type_names = [type_info['type']['name'] for type_info in types]

    # 获取宝可梦的图片链接
    pic1_url = pokemon_data['sprites']['other']['official-artwork']['front_default']
    pic2_url = pokemon_data['sprites']['other']['home']['front_default']
    pic3_url = pokemon_data['sprites']['front_default']

    # 下载并保存图片
    for i, pic_url in enumerate([pic1_url, pic2_url, pic3_url], start=1):
        if pic_url:
            img_response = requests.get(pic_url)
            if img_response.status_code == 200:
                img_name = f"{pokemon_name}_pic{i}.png"
                img_path = os.path.join(save_directory, img_name)
                with open(img_path, 'wb') as file:
                    file.write(img_response.content)

    # 将宝可梦的名称和属性存储到字典中
    pokemon_info[pokemon_name] = {
        "types": type_names,
        "images": [os.path.join(save_directory, f"{pokemon_name}_pic{i}.png") for i in range(1, 4) if eval(f"pic{i}_url")]
    }

# 将字典转换为 JSON 格式并保存到文件中
with open('data/pokemon_info.json', 'w') as json_file:
    json.dump(pokemon_info, json_file, indent=4)

print(f"所有宝可梦的名称和属性已保存到 pokemon_info.json 文件中，图片保存在 {save_directory} 目录中。")

所有宝可梦的名称和属性已保存到 pokemon_info.json 文件中，图片保存在 data/pokemon_images 目录中。


In [16]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image

# 定义自定义数据集类
class PokemonDataset(Dataset):
    def __init__(self, data_dict, transform=None):
        self.data_dict = data_dict
        self.transform = transform
        self.image_files = []
        self.labels = []
        
        # 遍历数据字典，获取所有图像文件路径和对应的标签（属性）
        for pokemon_name, info in data_dict.items():
            for img_path in info['images']:
                self.image_files.append(img_path)
                self.labels.append(info['types'])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")
        
        # 获取宝可梦的标签（属性）
        labels = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        # 将字符串标签转换为整数标签
        label_map = {'grass': 0, 'poison': 1, 'fire': 2, 'water': 3, 'electric': 4, 'ice': 5, 'fighting': 6,
                     'ground': 7, 'flying': 8, 'psychic': 9, 'bug': 10, 'rock': 11, 'ghost': 12, 'dark': 13,
                     'dragon': 14, 'steel': 15, 'fairy': 16, 'normal': 17}
        labels = [label_map[label] for label in labels]
        
        # 将标签转换为多标签二进制格式
        multi_label = torch.zeros(len(label_map), dtype=torch.float32)
        for label in labels:
            multi_label[label] = 1.0
        
        return image, multi_label

# 读取数据字典
with open('data/pokemon_info.json', 'r') as f:
    data_dict = json.load(f)

# 定义训练数据的转换
data_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

# 创建数据集
pokemon_dataset = PokemonDataset(data_dict=data_dict, transform=data_transforms)

# 将数据集分割为训练集、验证集和测试集（80%、10%、10%）
train_size = int(0.8 * len(pokemon_dataset))
val_size = int(0.1 * len(pokemon_dataset))
test_size = len(pokemon_dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(pokemon_dataset, [train_size, val_size, test_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)


In [4]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
import cv2
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 定义自定义数据集类
class PokemonDataset(Dataset):
    def __init__(self, data_dict, transform=None):
        self.data_dict = data_dict
        self.transform = transform
        self.image_files = []
        self.labels = []
        
        # 遍历数据字典，获取所有图像文件路径和对应的标签（属性）
        for pokemon_name, info in data_dict.items():
            for img_path in info['images']:
                self.image_files.append(img_path)
                self.labels.append(info['types'])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        
        # 使用OpenCV加载图像
        image = cv2.imread(img_path)
        if image is None:
            print(f"UnidentifiedImageError: cannot identify image file '{img_path}'")
            return self.__getitem__((idx + 1) % len(self.image_files))
        
        # 将图像从BGR转换为RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # 获取宝可梦的标签（属性）
        labels = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        # 将字符串标签转换为整数标签
        label_map = {'grass': 0, 'poison': 1, 'fire': 2, 'water': 3, 'electric': 4, 'ice': 5, 'fighting': 6,
                     'ground': 7, 'flying': 8, 'psychic': 9, 'bug': 10, 'rock': 11, 'ghost': 12, 'dark': 13,
                     'dragon': 14, 'steel': 15, 'fairy': 16, 'normal': 17}
        labels = [label_map[label] for label in labels]
        
        # 将标签转换为多标签二进制格式
        multi_label = torch.zeros(len(label_map), dtype=torch.float32)
        for label in labels:
            multi_label[label] = 1.0
        
        return image, multi_label

# 定义函数来提取图像特征
def extract_features(model, dataloader):
    model.eval()
    features = []
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs.cpu().numpy())
    return np.concatenate(features)


# 读取数据字典
with open('data/pokemon_info.json', 'r') as f:
    data_dict = json.load(f)

# 定义训练数据的转换
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

# 创建数据集
pokemon_dataset = PokemonDataset(data_dict=data_dict, transform=data_transforms)

# 将数据集分割为训练集、验证集和测试集（80%、10%、10%）
train_size = int(0.8 * len(pokemon_dataset))
val_size = int(0.1 * len(pokemon_dataset))
test_size = len(pokemon_dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(pokemon_dataset, [train_size, val_size, test_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# 检查是否有可用的GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 加载预训练的ResNet18模型
model = models.resnet18(pretrained=True)

# 修改最后一层以匹配属性的数量（假设有18种可能的属性）
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 18)

# 将模型移动到GPU
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# 训练循环
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs_labels in train_loader:
        if inputs_labels is None:
            continue
        
        inputs, labels = inputs_labels
        
        # 将输入和标签移动到GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

    # 验证循环
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs_labels in val_loader:
            if inputs_labels is None:
                continue
            
            inputs, labels = inputs_labels
            
            # 将输入和标签移动到GPU
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    
    print(f"Validation Loss: {val_loss/len(val_loader)}")

print("Training complete.")

# 测试循环（可选）
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs_labels in test_loader:
        if inputs_labels is None:
            continue
        
        inputs, labels = inputs_labels
        
        # 将输入和标签移动到GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        test_loss += loss.item()

print(f"Test Loss: {test_loss/len(test_loader)}")



Using device: cuda:0
Epoch 1/10, Loss: 0.30217974563491157
Validation Loss: 0.28418717986529635
Epoch 2/10, Loss: 0.2739431640721973
Validation Loss: 0.2694471507650061
Epoch 3/10, Loss: 0.26146089968903696
Validation Loss: 0.2585177209573923
Epoch 4/10, Loss: 0.2513724207839509
Validation Loss: 0.24945543199470363
Epoch 5/10, Loss: 0.2423582907881916
Validation Loss: 0.24683019611024365
Epoch 6/10, Loss: 0.23483559973730941
Validation Loss: 0.23940254087300644
Epoch 7/10, Loss: 0.22669326286241798
Validation Loss: 0.2360087030941678
Epoch 8/10, Loss: 0.21843267285260204
Validation Loss: 0.22845934024176648
Epoch 9/10, Loss: 0.2098351487976745
Validation Loss: 0.22521687905813
Epoch 10/10, Loss: 0.20106783593218253
Validation Loss: 0.22107784250347884
Training complete.
Test Loss: 0.22897303227296809


In [9]:
# 示例：查找与查询图像最相似的三个宝可梦图像
# 定义函数来找到最相似的宝可梦图像
def find_similar_images(query_image_path, model, feature_extractor, dataset, dataloader, top_k=3):
    # 加载查询图像并进行预处理
    query_image = cv2.imread(query_image_path)
    if query_image is None:
        raise ValueError(f"Cannot identify image file '{query_image_path}'")
    
    query_image = cv2.cvtColor(query_image, cv2.COLOR_BGR2RGB)
    query_image = data_transforms(query_image).unsqueeze(0).to(device)
    
    # 提取查询图像的特征
    with torch.no_grad():
        query_features = feature_extractor(query_image).cpu().numpy()
    
    # 提取数据集中所有图像的特征
    dataset_features = extract_features(feature_extractor, dataloader)
    
    # 计算余弦相似度
    similarities = cosine_similarity(query_features, dataset_features)
    
    # 找到最相似的图像索引
    top_k_indices = np.argsort(similarities[0])[::-1][:top_k]
    
    # 返回最相似的图像路径和相似度分数
    similar_images = [(dataset.image_files[idx], similarities[0][idx]) for idx in top_k_indices]
    
    return similar_images

query_image_path = 'data/palworld_images/Anubis.png'
similar_images = find_similar_images(query_image_path, model, model, pokemon_dataset, train_loader, top_k=3)
print("Most similar images:")
for img_path, score in similar_images:
    print(f"Image: {img_path}, Similarity Score: {score}")

Most similar images:
Image: data/pokemon_images\passimian_pic3.png, Similarity Score: 0.9777513146400452
Image: data/pokemon_images\incineroar_pic1.png, Similarity Score: 0.9698570966720581
Image: data/pokemon_images\azurill_pic1.png, Similarity Score: 0.969271719455719


In [10]:
from sklearn.metrics.pairwise import euclidean_distances

def find_similar_images_euclidean(query_image_path, model, feature_extractor, dataset, dataloader, top_k=3):
    query_image = cv2.imread(query_image_path)
    if query_image is None:
        raise ValueError(f"Cannot identify image file '{query_image_path}'")

    query_image = cv2.cvtColor(query_image, cv2.COLOR_BGR2RGB)
    query_image = data_transforms(query_image).unsqueeze(0).to(device)

    with torch.no_grad():
        query_features = feature_extractor(query_image).cpu().numpy()

    dataset_features = extract_features(feature_extractor, dataloader)

    distances = euclidean_distances(query_features, dataset_features)

    top_k_indices = np.argsort(distances[0])[:top_k]

    similar_images = [(dataset.image_files[idx], distances[0][idx]) for idx in top_k_indices]

    return similar_images

query_image_path = 'data/palworld_images/Anubis.png'
similar_images = find_similar_images_euclidean(query_image_path, model, model, pokemon_dataset, train_loader, top_k=3)
print("Most similar images:")
for img_path, score in similar_images:
    print(f"Image: {img_path}, Distance: {score}")

Most similar images:
Image: data/pokemon_images\cetoddle_pic3.png, Distance: 3.8211874961853027
Image: data/pokemon_images\bastiodon_pic1.png, Distance: 3.8765923976898193
Image: data/pokemon_images\wyrdeer_pic1.png, Distance: 4.010998249053955


In [12]:
from torchvision.models import vgg16

# 加载预训练的VGG16模型
vgg_model = vgg16(pretrained=True)
vgg_model.classifier = nn.Sequential(*list(vgg_model.classifier.children())[:-1])  # 移除最后一层
vgg_model = vgg_model.to(device)

def extract_vgg_features(model, dataloader):
    model.eval()
    features = []
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs.cpu().numpy())
    return np.concatenate(features)

query_image_path = 'data/palworld_images/Anubis.png'
similar_images = find_similar_images(query_image_path, vgg_model, vgg_model, pokemon_dataset, train_loader, top_k=3)
print("Most similar images:")
for img_path, score in similar_images:
    print(f"Image: {img_path}, Similarity Score: {score}")

Most similar images:
Image: data/pokemon_images\cradily_pic3.png, Similarity Score: 0.514254093170166
Image: data/pokemon_images\absol_pic2.png, Similarity Score: 0.49518799781799316
Image: data/pokemon_images\drowzee_pic2.png, Similarity Score: 0.4846184253692627


In [14]:
import cv2
import imagehash
import numpy as np
 
def find_similar_images_hash(query_image_path, dataset, top_k=3):
    # 使用 OpenCV 读取图像
    query_image = cv2.imread(query_image_path)
    # OpenCV 读取的图像是 BGR 格式，需要转换为 RGB 格式
    query_image_rgb = cv2.cvtColor(query_image, cv2.COLOR_BGR2RGB)
    # 将图像转换为 Pillow 图像对象以使用 imagehash 库
    query_image_pil = Image.fromarray(query_image_rgb)
    query_hash = imagehash.phash(query_image_pil)
 
    image_hashes = []
    for img_path in dataset.image_files:
        # 使用 OpenCV 读取图像
        img = cv2.imread(img_path)
        # 转换为 RGB 格式
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # 转换为 Pillow 图像对象
        img_pil = Image.fromarray(img_rgb)
        # 计算图像哈希
        img_hash = imagehash.phash(img_pil)
        image_hashes.append((img_path, img_hash))
 
    similarities = [(img_path, 1 - (query_hash - img_hash) / len(query_hash.hash)**2) for img_path, img_hash in image_hashes]
    similarities.sort(key=lambda x: x[1], reverse=True)
 
    return similarities[:top_k]
 
query_image_path = 'data/palworld_images/Anubis.png'
# 假设 pokemon_dataset 是一个具有 image_files 属性的对象，该属性包含图像文件路径的列表
# 注意：你需要确保 pokemon_dataset 已经被正确定义并包含有效的图像文件路径
similar_images = find_similar_images_hash(query_image_path, pokemon_dataset, top_k=3)
print("Most similar images:")
for img_path, score in similar_images:
    print(f"Image: {img_path}, Similarity Score: {score}")

Most similar images:
Image: data/pokemon_images\pichu_pic3.png, Similarity Score: 0.75
Image: data/pokemon_images\chinchou_pic3.png, Similarity Score: 0.71875
Image: data/pokemon_images\porygon2_pic2.png, Similarity Score: 0.71875
