<a href="https://colab.research.google.com/github/RyanTokManMokMTM/Tensorflow/blob/master/TF_DATASET_CREATING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#CREATE DATASET

In [41]:
import tensorflow as tf
import numpy as np

In [42]:
dataset = tf.data.Dataset.from_tensor_slices([0.5,99.2,667,963.2,0.22166])
#let your input list to a tensor type

In [43]:
for ele in dataset:
  print(ele)

tf.Tensor(0.5, shape=(), dtype=float32)
tf.Tensor(99.2, shape=(), dtype=float32)
tf.Tensor(667.0, shape=(), dtype=float32)
tf.Tensor(963.2, shape=(), dtype=float32)
tf.Tensor(0.22166, shape=(), dtype=float32)


In [44]:
for ele in dataset:
    print(ele.numpy())

0.5
99.2
667.0
963.2
0.22166


In [45]:
for ele in dataset.take(4):
  print(ele.numpy())

0.5
99.2
667.0
963.2


In [46]:
dataset = tf.data.Dataset.from_tensor_slices(np.array([0.00000004,0.004,0.000354,0.9999,0.012345,0.1,0.00001]))

In [47]:
for ele in dataset:
  print(ele)

tf.Tensor(4e-08, shape=(), dtype=float64)
tf.Tensor(0.004, shape=(), dtype=float64)
tf.Tensor(0.000354, shape=(), dtype=float64)
tf.Tensor(0.9999, shape=(), dtype=float64)
tf.Tensor(0.012345, shape=(), dtype=float64)
tf.Tensor(0.1, shape=(), dtype=float64)
tf.Tensor(1e-05, shape=(), dtype=float64)


In [48]:
for ele in dataset:
  print(ele.numpy())

4e-08
0.004
0.000354
0.9999
0.012345
0.1
1e-05


In [59]:
dataset = tf.data.Dataset.from_tensor_slices([1,2,3])

In [60]:
#shuffle() = random order
dataset = dataset.shuffle(3)
#repeat that dataset ? time(every time is shuffled)
dataset = dataset.repeat(3)
for ele in dataset:
  print(ele.numpy())

1
2
3
2
3
1
3
1
2


In [61]:
#all elements in dataset batch in a size of group
#example:datasize has 9 elements and batch(3),those value'll be grouped by 3 elements
#[x,x,x],[x,x,x],[x,x,x] 
dataset = dataset.batch(9)
for ele in dataset:
  print(ele.numpy())

[1 3 2 2 1 3 1 2 3]


In [63]:
dataset = tf.data.Dataset.from_tensor_slices(np.array([1,2,3,4,5,6,7,8,9]))

In [64]:
#tf.square = pow of 2
for ele in dataset.map(tf.square):
  print(ele.numpy()) #all element is the result of pow 2

1
4
9
16
25
36
49
64
81


In [97]:
dataset_2D = tf.data.Dataset.from_tensor_slices(np.array([[1,2],[6,7],[8,9],[22,33]]))

In [98]:
for ele in dataset_2D:
  print(ele.numpy())

[1 2]
[6 7]
[8 9]
[22 33]


In [99]:
dataset_2D = dataset_2D.shuffle(4)
dataset_2D = dataset_2D.repeat(3)
#dataset_2D = dataset_2D.batch(3)
dataset_2D = dataset_2D.map(tf.square)
for ele in dataset_2D:
  print(ele.numpy())

[ 484 1089]
[64 81]
[36 49]
[1 4]
[36 49]
[ 484 1089]
[64 81]
[1 4]
[1 4]
[64 81]
[ 484 1089]
[36 49]


In [105]:
dataset_dict = tf.data.Dataset.from_tensor_slices({
          "a":[1,2,3],
          "b":[4,5,6],
          "c":[8,9,10]
})

In [106]:
dataset_dict

<TensorSliceDataset shapes: {a: (), b: (), c: ()}, types: {a: tf.int32, b: tf.int32, c: tf.int32}>

In [109]:
for ele in dataset_dict:
  print(ele)
#1:a:1 b:4 c:8
#2:a:2 b:5 c:9
#3:a:3 b:6 c:10

{'a': <tf.Tensor: shape=(), dtype=int32, numpy=1>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=4>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=8>}
{'a': <tf.Tensor: shape=(), dtype=int32, numpy=2>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=5>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=9>}
{'a': <tf.Tensor: shape=(), dtype=int32, numpy=3>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=6>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=10>}


#DATASET EXAMPLE

In [110]:
(train_feature,train_label),(test_featurn,test_label) = tf.keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [116]:
train_feature.shape

(60000, 28, 28)

In [117]:
train_feature

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [120]:
trainning_feature_dataset = tf.data.Dataset.from_tensor_slices(train_feature) 
trainning_label_dataset = tf.data.Dataset.from_tensor_slices(train_label) 

In [122]:
trainning_feature_datase #28*28 array

<TensorSliceDataset shapes: (28, 28), types: tf.uint8>

In [123]:
trainning_label_dataset #one number array

<TensorSliceDataset shapes: (), types: tf.uint8>

# merge these dataSet together

In [125]:
trainning_dataset = tf.data.Dataset.zip((trainning_feature_dataset,trainning_label_dataset)) #marge then to a tuple

In [129]:
trainning_dataset = trainning_dataset.shuffle(10000)
#trainning_dataset = trainning_dataset.repeat()
#trainning_dataset = trainning_dataset.batch(64)

In [133]:
#create model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape =(28,28)))
model.add(tf.keras.layers.Dense(units=32,activation="relu"))
model.add(tf.keras.layers.Dense(units=10,activation="softmax"))

In [136]:
#bulid model
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=(["acc"])
)

In [143]:
step = train_feature.shape[0]//64
print(step)

937


In [147]:
#fit model
model.fit(trainning_dataset,epochs=5,steps_per_epoch=step)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f6a18984400>