In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [2]:
csvPath = './PokemonDataCSV/'
csvName = 'pokedex_(Update_05.20).csv'
pokedex = pd.read_csv(csvPath+csvName)
pokedex = pokedex.drop(['Unnamed: 0'], axis=1)
pokedex.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1028 entries, 0 to 1027
Data columns (total 50 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   pokedex_number    1028 non-null   int64  
 1   name              1028 non-null   object 
 2   german_name       938 non-null    object 
 3   japanese_name     938 non-null    object 
 4   generation        1028 non-null   int64  
 5   status            1028 non-null   object 
 6   species           1028 non-null   object 
 7   type_number       1028 non-null   int64  
 8   type_1            1028 non-null   object 
 9   type_2            542 non-null    object 
 10  height_m          1028 non-null   float64
 11  weight_kg         1027 non-null   float64
 12  abilities_number  1028 non-null   int64  
 13  ability_1         1025 non-null   object 
 14  ability_2         513 non-null    object 
 15  ability_hidden    810 non-null    object 
 16  total_points      1028 non-null   float64


In [3]:
#使用するデータのみで表を作る
pokedex = pokedex[ ['pokedex_number', 'name', 'japanese_name', 'type_number', 'type_1', 'type_2',
                   'egg_type_number', 'egg_type_1', 'egg_type_2'] ]
pokedex.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1028 entries, 0 to 1027
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   pokedex_number   1028 non-null   int64 
 1   name             1028 non-null   object
 2   japanese_name    938 non-null    object
 3   type_number      1028 non-null   int64 
 4   type_1           1028 non-null   object
 5   type_2           542 non-null    object
 6   egg_type_number  1028 non-null   int64 
 7   egg_type_1       1025 non-null   object
 8   egg_type_2       282 non-null    object
dtypes: int64(3), object(6)
memory usage: 72.4+ KB


In [4]:
# egg_type_1が欠損値の行を削除（主にメガ進化ポケモン）
pokedex = pokedex.dropna(subset=['egg_type_1'])
# pokedex_numberが重複している行を削除（主にメガ進化ポケモン）
pokedex = pokedex.drop_duplicates(subset=['pokedex_number'])

In [6]:
import os
from os import listdir
# 画像ファイルの読み込み
imageDir = "./ValidationData/"
# .DS_Storeの削除
fltr_list = [filename for filename in listdir(imageDir) if not filename.startswith('.')]
fltr_list = [filename for filename in listdir(imageDir) if not '-' in filename]

In [7]:
fileNameLists = []
fileNumberLists = []
for filename in fltr_list:
    fileNameLists.append(filename)
    fileNumber = filename.split('.')[0]
#     if fileNumber == '':
#         continue
    fileNumber = int(fileNumber)
    fileNumberLists.append(fileNumber)

pokedex2 = pd.DataFrame( [fileNumberLists, fileNameLists], index=['pokedex_number', 'image_name'] )
pokedex2 = pokedex2.T

In [8]:
pokedex2

Unnamed: 0,pokedex_number,image_name
0,348,348.png
1,412,412.png
2,374,374.png
3,360,360.png
4,406,406.png
...,...,...
716,379,379.png
717,423,423.png
718,345,345.png
719,351,351.png


In [9]:
pokedex3 = pd.merge(pokedex, pokedex2, on='pokedex_number', how='inner' )

In [10]:
pokedex3.head(30)

Unnamed: 0,pokedex_number,name,japanese_name,type_number,type_1,type_2,egg_type_number,egg_type_1,egg_type_2,image_name
0,1,Bulbasaur,フシギダネ (Fushigidane),2,Grass,Poison,2,Grass,Monster,1.png
1,2,Ivysaur,フシギソウ (Fushigisou),2,Grass,Poison,2,Grass,Monster,2.png
2,3,Venusaur,フシギバナ (Fushigibana),2,Grass,Poison,2,Grass,Monster,3.png
3,4,Charmander,ヒトカゲ (Hitokage),1,Fire,,2,Dragon,Monster,4.png
4,5,Charmeleon,リザード (Lizardo),1,Fire,,2,Dragon,Monster,5.png
5,6,Charizard,リザードン (Lizardon),2,Fire,Flying,2,Dragon,Monster,6.png
6,7,Squirtle,ゼニガメ (Zenigame),1,Water,,2,Monster,Water 1,7.png
7,8,Wartortle,カメール (Kameil),1,Water,,2,Monster,Water 1,8.png
8,9,Blastoise,カメックス (Kamex),1,Water,,2,Monster,Water 1,9.png
9,10,Caterpie,キャタピー (Caterpie),1,Bug,,1,Bug,,10.png


In [11]:
for index, value in pokedex3['egg_type_1'].value_counts().iteritems():
    print(index, ': ', value)

Field :  186
Undiscovered :  73
Bug :  68
Amorphous :  49
Dragon :  47
Fairy :  46
Mineral :  46
Flying :  42
Grass :  38
Human-Like :  36
Monster :  31
Water 1 :  31
Water 3 :  14
Water 2 :  13
Ditto :  1


In [12]:
def createLabel(pokedex3):
    labels = []
    egg_type_list = ['Field', 'Undiscovered', 'Bug', 'Amorphous', 'Dragon', 'Fairy', 'Mineral', 
                    'Flying', 'Grass', 'Human-Like', 'Monster', 'Water', 'Ditto']
    
    number = 0
    for eggType1 in pokedex3['egg_type_1']:
        count = 0
        for elm in egg_type_list:
            if elm in eggType1:
                labels.append(count)
            
            else:
                count += 1
        number += 1
  
    pokedex3['label'] = labels
    return pokedex3

In [13]:
createLabel(pokedex3)

Unnamed: 0,pokedex_number,name,japanese_name,type_number,type_1,type_2,egg_type_number,egg_type_1,egg_type_2,image_name,label
0,1,Bulbasaur,フシギダネ (Fushigidane),2,Grass,Poison,2,Grass,Monster,1.png,8
1,2,Ivysaur,フシギソウ (Fushigisou),2,Grass,Poison,2,Grass,Monster,2.png,8
2,3,Venusaur,フシギバナ (Fushigibana),2,Grass,Poison,2,Grass,Monster,3.png,8
3,4,Charmander,ヒトカゲ (Hitokage),1,Fire,,2,Dragon,Monster,4.png,4
4,5,Charmeleon,リザード (Lizardo),1,Fire,,2,Dragon,Monster,5.png,4
...,...,...,...,...,...,...,...,...,...,...,...
716,717,Yveltal,イベルタル (Yveltal),2,Dark,Flying,1,Undiscovered,,717.png,1
717,718,Zygarde 50% Forme,ジガルデ (Zygarde),2,Dragon,Ground,1,Undiscovered,,718.png,1
718,719,Diancie,ディアンシー (Diancie),2,Rock,Fairy,1,Undiscovered,,719.png,1
719,720,Hoopa Hoopa Confined,フーパ (Hoopa),2,Psychic,Ghost,1,Undiscovered,,720.png,1


In [16]:
import shutil
egg_type_list = ['Field', 'Undiscovered', 'Bug', 'Amorphous', 'Dragon', 'Fairy', 'Mineral', 
                    'Flying', 'Grass', 'Human-Like', 'Monster', 'Water', 'Ditto']
path = './AllDataSet/validation/'
i = 0
while i < len(egg_type_list):
    os.mkdir(path + str(i))
    i += 1

In [17]:
for imageName, label in zip(pokedex3['image_name'], pokedex3['label']):
    i = 0
    while i < len(egg_type_list):
        if label == i:
            shutil.move(path+imageName, path+str(i))
        i += 1