<a href="https://colab.research.google.com/github/Redwoods/Py/blob/master/pdm2020/my-note/py-tensorflow/tf2_1_datasets_cv_II.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CV (Computer vision) datasets-2 in AI
> https://www.tensorflow.org/api_docs/python/tf/keras/datasets  
- from keras
    - CIFAR-10: cifar10
    - CIFAR-100: cifar100
    -  ...



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline

print(tf.__version__)

## CIFAR-10 Dataset (CIFAR: Canadian Institute For Advanced Research)
> https://www.cs.toronto.edu/~kriz/cifar.html  
- (50000, 32, 32, 3), train
- (10000, 32, 32, 3), test
- **color** photographs of objects from 10 classes, such as frogs, birds, cats, ships, etc

### class names
- class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
from keras.datasets import cifar10
# load dataset
(trainX, trainy), (testX, testy) = cifar10.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))


In [None]:
# plot first few images
for i in range(9):
	# define subplot
	plt.subplot(330 + 1 + i)
	# plot raw pixel data
	plt.imshow(trainX[i])
# show the figure
# plt.show()

## CIFAR-10 trainset

In [None]:
type(trainy), trainy.shape

In [None]:
trainy[:5]

In [None]:
# Frequencies of each kind of image
unique, counts = np.unique(trainy, return_counts=True)
cifar10_train_dic = dict(zip(unique, counts))
cifar10_train_dic

In [None]:
plt.bar(list(cifar10_train_dic.keys()), cifar10_train_dic.values(), color='g')

#### 트레인데이터에서 무작위로 한 종류를 선택해서 출력

In [None]:
# display one random image from our training set:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

idx = np.random.randint(0, trainX.shape[0])
print(idx)
image = trainX[idx]
plt.imshow(image)
plt.title(class_names[trainy[idx]])
plt.xticks([])
plt.yticks([])

plt.show()

#### 대표 패션 그림 출력

In [None]:
trainy[:5],trainy[5] #,trainy[5].item()

In [None]:
num0_9 = trainX[trainy == i]

In [None]:
num0_9 = trainX[trainy[:,0] == 1]
num0_9.shape, num0_9[0], num0_9[0].shape

In [None]:
fig = plt.figure(figsize=(10,6))
for i in range(10):
    plt.subplot(2, 5, i+1)
    num0_9 = trainX[trainy[:,0] == i]
    print(num0_9.shape)
    plt.imshow(num0_9[0]) #, cmap='gray') #, interpolation='none')
    plt.title("Class %d" % (i))
    plt.xticks([])
    plt.yticks([])



### [DIY] cifar-10 trainset에서 각 유형이 출현하는 첫 인덱스를 이용해서 대표유형들을 출력하시오.

In [None]:
## Your code here!
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_10 = np.unique(trainy, return_index=True)[1]
images = trainX[num_10]
fig = plt.figure(figsize=(10,6))
for i in range(len(images)):
    # define subplot
    plt.subplot(2,5,1 + i) #, sharey=False)
    # plot raw pixel data
    plt.imshow(images[i])
    plt.title(class_names[i])
    plt.xticks([])
    plt.yticks([])
plt.suptitle("10 different images", fontsize=18)
plt.show()

### CIFAR-10 test set

In [None]:
# import numpy as np
print(type(testy))
unique, counts = np.unique(testy, return_counts=True)
cifar10_test_dic = dict(zip(unique, counts))
cifar10_test_dic


In [None]:
plt.bar(list(cifar10_test_dic.keys()), cifar10_test_dic.values(), color='g')

In [None]:
fig = plt.figure(figsize=(5,3))
for i in range(10):
    plt.subplot(2, 5, i+1)
    num0_9 = testX[testy[:,0] == i]
    plt.imshow(num0_9[0]) #, cmap='gray') #, interpolation='none')
    plt.title("Class %d" % (i))
    plt.xticks([])
    plt.yticks([])



### [DIY]cifar-10 testset에서 각 유형이 출현하는 첫 인덱스를 이용해서 대표유형들을 출력하시오.

In [None]:
# Your code here!






---



---



## CIFAR-100 Dataset (CIFAR: Canadian Institute For Advanced Research)
> https://www.cs.toronto.edu/~kriz/cifar.html  
- (50000, 32, 32, 3), train
- (10000, 32, 32, 3), test
- color photographs of objects from 100 classes, such as fish, flowers, insects, and much more.  

> The 100 classes in the CIFAR-100 are grouped into 20 superclasses. Each image comes with a "fine" label (the class to which it belongs) and a "coarse" label (the superclass to which it belongs).

### class names
- class_names = ['beaver', 'dolphin', 'otter', 'seal', 'whale', 'aquarium fish', 'flatfish', 'ray', 'shark', 'trout', 
'orchids', 'poppies', 'roses', 'sunflowers', 'tulips', 
'bottles', 'bowls', 'cans', 'cups', 'plates', 
'apples', 'mushrooms', 'oranges', 'pears', 'sweet peppers', 
'clock', 'keyboard', 'lamp', 'telephone', 'television', 
'bed', 'chair', 'couch', 'table', 'wardrobe', 
'bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach', 
'bear', 'leopard', 'lion', 'tiger', 'wolf', 
'bridge', 'castle', 'house', 'road', 'skyscraper', 
'cloud', 'forest', 'mountain', 'plain', 'sea', 
'camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo', 
'fox', 'porcupine', 'possum', 'raccoon', 'skunk', 
'crab', 'lobster', 'snail', 'spider', 'worm', 
'baby', 'boy', 'girl', 'man', 'woman', 
'crocodile', 'dinosaur', 'lizard', 'snake', 'turtle', 
'hamster', 'mouse', 'rabbit', 'shrew', 'squirrel', 
'maple', 'oak', 'palm', 'pine', 'willow', 
'bicycle', 'bus', 'motorcycle', 'pickup truck', 'train', 
'lawn-mower', 'rocket', 'streetcar', 'tank', 'tractor']

### cifar-100: Superclass(20), Classes(100)
```
Superclass	Classes
aquatic mammals	beaver, dolphin, otter, seal, whale
fish	aquarium fish, flatfish, ray, shark, trout
flowers	orchids, poppies, roses, sunflowers, tulips
food containers	bottles, bowls, cans, cups, plates
fruit and vegetables	apples, mushrooms, oranges, pears, sweet peppers
household electrical devices	clock, keyboard, lamp, telephone, television
household furniture	bed, chair, couch, table, wardrobe
insects	bee, beetle, butterfly, caterpillar, cockroach
large carnivores	bear, leopard, lion, tiger, wolf
large man-made outdoor things	bridge, castle, house, road, skyscraper
large natural outdoor scenes	cloud, forest, mountain, plain, sea
large omnivores and herbivores	camel, cattle, chimpanzee, elephant, kangaroo
medium-sized mammals	fox, porcupine, possum, raccoon, skunk
non-insect invertebrates	crab, lobster, snail, spider, worm
people	baby, boy, girl, man, woman
reptiles	crocodile, dinosaur, lizard, snake, turtle
small mammals	hamster, mouse, rabbit, shrew, squirrel
trees	maple, oak, palm, pine, willow
vehicles 1	bicycle, bus, motorcycle, pickup truck, train
vehicles 2	lawn-mower, rocket, streetcar, tank, tractor
```

In [None]:
from keras.datasets import cifar100
# load dataset
(trainX, trainy), (testX, testy) = cifar100.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))

In [None]:
trainy[:10]  # 100 labels

In [None]:
# plot first few images
for i in range(9):
	# define subplot
	plt.subplot(330 + 1 + i)
	# plot raw pixel data
	plt.imshow(trainX[i])
# show the figure
# plt.show()

### CIFAR-100 trainset

In [None]:
# import numpy as np
print(type(trainy))
unique, counts = np.unique(trainy, return_counts=True)
cifar100_train_dic = dict(zip(unique, counts))
print(cifar100_train_dic)

In [None]:
# plt.figure(figsize=(12,5))
plt.bar(list(cifar100_train_dic.keys()), cifar100_train_dic.values(), color='g')

In [None]:
fig = plt.figure(figsize=(10,13))
for i in range(100):
    plt.subplot(10, 10, i+1)
    num0_99 = trainX[trainy[:,0] == i]
    plt.imshow(num0_99[0])  #, cmap='gray') #, interpolation='none')
    plt.title("Class %d" % (i))
    plt.xticks([])
    plt.yticks([])



### [DIY] cifar-100 trainset에서 각 유형이 출현하는 첫 인덱스를 이용해서 대표유형들을 출력하시오.

In [None]:
class_names = ['beaver', 'dolphin', 'otter', 'seal', 'whale', 'aquarium fish', 'flatfish', 'ray', 'shark', 'trout', 'orchids', 'poppies', 'roses', 'sunflowers', 'tulips', 'bottles', 'bowls', 'cans', 'cups', 'plates', 'apples', 'mushrooms', 'oranges', 'pears', 'sweet peppers', 'clock', 'keyboard', 'lamp', 'telephone', 'television', 'bed', 'chair', 'couch', 'table', 'wardrobe', 'bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach', 'bear', 'leopard', 'lion', 'tiger', 'wolf', 'bridge', 'castle', 'house', 'road', 'skyscraper', 'cloud', 'forest', 'mountain', 'plain', 'sea', 'camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo', 'fox', 'porcupine', 'possum', 'raccoon', 'skunk', 'crab', 'lobster', 'snail', 'spider', 'worm', 'baby', 'boy', 'girl', 'man', 'woman', 'crocodile', 'dinosaur', 'lizard', 'snake', 'turtle', 'hamster', 'mouse', 'rabbit', 'shrew', 'squirrel', 'maple', 'oak', 'palm', 'pine', 'willow', 'bicycle', 'bus', 'motorcycle', 'pickup truck', 'train', 'lawn-mower', 'rocket', 'streetcar', 'tank', 'tractor']
class_names.sort()
print(class_names)

In [None]:
# np.unique(trainy, return_index=True)[0], np.unique(trainy, return_index=True)[1], trainy[:10]

In [None]:
# Your code here!  'computer keyboard' => 'keyboard'
class_names = ['beaver', 'dolphin', 'otter', 'seal', 'whale', 'aquarium fish', 'flatfish', 'ray', 'shark', 'trout', 'orchids', 'poppies', 'roses', 'sunflowers', 'tulips', 'bottles', 'bowls', 'cans', 'cups', 'plates', 'apples', 'mushrooms', 'oranges', 'pears', 'sweet peppers', 'clock', 'keyboard', 'lamp', 'telephone', 'television', 'bed', 'chair', 'couch', 'table', 'wardrobe', 'bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach', 'bear', 'leopard', 'lion', 'tiger', 'wolf', 'bridge', 'castle', 'house', 'road', 'skyscraper', 'cloud', 'forest', 'mountain', 'plain', 'sea', 'camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo', 'fox', 'porcupine', 'possum', 'raccoon', 'skunk', 'crab', 'lobster', 'snail', 'spider', 'worm', 'baby', 'boy', 'girl', 'man', 'woman', 'crocodile', 'dinosaur', 'lizard', 'snake', 'turtle', 'hamster', 'mouse', 'rabbit', 'shrew', 'squirrel', 'maple', 'oak', 'palm', 'pine', 'willow', 'bicycle', 'bus', 'motorcycle', 'pickup truck', 'train', 'lawn-mower', 'rocket', 'streetcar', 'tank', 'tractor']
class_names.sort()  ## 중요!
num_100 = np.unique(trainy, return_index=True)[1]
images = trainX[num_100]
fig = plt.figure(figsize=(14,15))
for i in range(len(images)):
    # define subplot
    plt.subplot(10,10,1 + i) #, sharey=False)
    # plot raw pixel data
    plt.imshow(images[i])
    plt.title(class_names[i])
    plt.xticks([])
    plt.yticks([])
plt.suptitle("100 different images", fontsize=18)
plt.show()

### CIFAR-100 test set

In [None]:
# import numpy as np
print(type(testy))
unique, counts = np.unique(testy, return_counts=True)
cifar100_test_dic = dict(zip(unique, counts))
cifar100_test_dic


In [None]:
plt.bar(list(cifar100_test_dic.keys()), cifar100_test_dic.values(), color='g')

In [None]:
fig = plt.figure(figsize=(10,13))
for i in range(100):
    plt.subplot(10, 10, i+1)
    num0_99 = testX[testy[:,0] == i]
    plt.imshow(num0_99[33]) #, cmap='gray') #, interpolation='none')
    plt.title("Class %d" % (i))
    plt.xticks([])
    plt.yticks([])



### [DIY]cifar-100 testset에서 각 유형이 출현하는 첫 인덱스를 이용해서 대표유형들을 출력하시오.

In [None]:
# Your code here!






---



---



### [DIY] cifar-100 testset에서 class=2,35의 그림들을 모두 출력하시오.

### class=2, 35: 사람 사진 모음
- class=2: baby
-class=35: girl

In [None]:
class_names[2],class_names[35]

In [None]:
# your code here!








---



---



## What is your dataset? Imagine your data!