<a href="https://colab.research.google.com/github/Jihunni/ML-DL/blob/main/Resource_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# setup

In [3]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Is this notebook running on Colab or Kaggle?
IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules

if IS_COLAB or IS_KAGGLE:
    !pip install -q -U tfx==0.21.2
    print("You can safely ignore the package incompatibility errors.")

if IS_COLAB:
    !pip install -q -U tensorflow-addons
    !pip install -q -U transformers

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected. LSTMs and CNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")
    if IS_KAGGLE:
        print("Go to Settings > Accelerator and select GPU.")

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "data"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

[K     |████████████████████████████████| 1.1 MB 7.2 MB/s 
[K     |████████████████████████████████| 1.9 MB 65.4 MB/s 
[K     |████████████████████████████████| 1.5 MB 66.6 MB/s 
[K     |████████████████████████████████| 636 kB 56.6 MB/s 
[K     |████████████████████████████████| 103 kB 71.4 MB/s 
[K     |████████████████████████████████| 3.0 MB 48.3 MB/s 
[K     |████████████████████████████████| 59.2 MB 108 kB/s 
[K     |████████████████████████████████| 147 kB 67.5 MB/s 
[K     |████████████████████████████████| 4.9 MB 30.1 MB/s 
[K     |████████████████████████████████| 241 kB 21.9 MB/s 
[K     |████████████████████████████████| 2.4 MB 38.6 MB/s 
[K     |████████████████████████████████| 1.2 MB 36.5 MB/s 
[K     |████████████████████████████████| 151 kB 53.1 MB/s 
[K     |████████████████████████████████| 56 kB 5.9 MB/s 
[K     |████████████████████████████████| 218 kB 93.3 MB/s 
[K     |████████████████████████████████| 77 kB 7.6 MB/s 
[K     |████████████████████

# Data type

## tensors and operations ( Hands-on p 379)
- Numpy use 64-bit precision by default, while TensorFlow uses 32-bits.
- tf.Tensor is immutable. (not modify them)
- type conversions are not performed automatically.

In [None]:
x = tf.range(10)

In [None]:
tensor = tf.constant([[1,2,3],[4,5,6,]])
tensor.shape
tensor.dtype

#slicing
tensor[:,1:]
tensor[..., 1, tf.newaxis]

In [None]:
#variable
v = tf.Variable([1,2,3], [4,5,6])

v.assign(2 * v)
v[0, 1].assign(42)
v[:,2].assign([0., 1.])
v.scatter_nd_update(indices=[[0,0], [1,2]], updates=[100., 200.])

In [None]:
#operation
##addition (these three are same; overriding)
tensor + 10
tf.add(tensor, 10)
tf.math.add()

tf.multiply()
tf.square(tensor)
tf.exp()
tf.sqrt()
tf.squeeze()
tf.tile()
tf.transpose()

tf.reduce_mean()
tf.reduce_sum()
tf.reduce_max()
tf.math.log()


#matrix multiplication
tensor @ tf.transpose(tensor) 
tf.matmul(tensor, tf.transpose(tensor))

## Data API: dataset (Hand-on p414)
Dataset is a sequence of data items.

In [None]:
#create a dataset
x = tf.range(10)
dataset = tf.data.Dataset.from_tensor_slices(x)

In [None]:
# iteration
for item in dataset:
  print(item)

In [None]:
dataset_2 = dataset.repeat(3).batch(7)
dataset_3 = dataset.repeat(3).batch(7, drop_remainder=True)

In [None]:
#transform the items by calling the map()
dataset_4 = dataset.map(lambda x : x * 2)

#transform the dataset as a whole by calling apply()
dataset_5 = dataset.apply(tf.data.experimental.unbatch)

In [None]:
#filter
dataset_6 = dataset.filter(lambda x : x <10)

In [None]:
# shuffle
dataset_7 = dataset.shuffle(buffer_size=5, seed=42).batch(7)
  ## enough buffer is required for proper shuffling

#### preprocessing (p419)

### Prefetching (p421)


In [None]:
dataset = dataset.prefatch(1)

### chopping the sequential Dataset into multiple windows (p528)

In [None]:
#create a window
n_steps = 100
shift = 1 # target = input shifted 1 character ahead
window_length = n_steps + shift
dataset = dataset.window(window_length, shift, drop_reminder = True)

#convert a nested dataset into a flat dataset
dataset = dataset.flat_map(lambda window: window.batch(window_length))

#shffule these windows for fast traning
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

# Loading the Data and Preparing the Dataset

In [4]:
shakespeare_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

Downloading data from https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt


# Preprocessing the Input Features

### create one-hot vector

In [None]:
# create one-hot vector by numpy
#ref: https://m.blog.naver.com/PostView.naver?isHttpsRedirect=true&blogId=qbxlvnf11&logNo=221528102803

import numpy as np

flower_list = ['Cherry Blossom', 'Dandelion', 'korean rosebay']
print(flower_list)

values = []
for x in range(len(flower_list)):
    values.append(x)
print(values)

values_len = len(values)
print(values_len)

encoding = np.eye(values_len)[values]
print(encoding)

In [7]:
#create a lookup table
#ref : Hands-on p431
category_list = ["one", "two", "three", "four"]
indices = tf.range(len(category_list), dtype=tf.int64)
table_init = tf.lookup.KeyValueTensorInitializer(category_list, indices) # create an initializer for the lookup table
num_oov_buckets = 2 #the number of out-of-vocabulary (oov) buckets
table = tf.lookup.StaticVocabularyTable(table_init, num_oov-buckets) #create the lookup table

array([0., 1., 0., 0.])

In [None]:
ex_input = tf.constant(["three", "two", "extra"])
ex_indices = table.lookup(ex_input)
ex_one_hot = tf.one_hot(ex_indices, depth=len(category_list) + num_oov_buckets)

# Building and Training the model

In [None]:
#LSTM layer
model = tf.kears.models.Sequential([
    tf.keras.layers.LSTM(20, return_sequences=True, input_shape=[None, 1]),
    tf.keras.layers.LSTM(tf.keras.LSTMCell(20),
    tf.keras.layers.TimeDistriuted(tf.keras.layers.Dense(10))                                       
])

In [None]:
#the general-purpose keras.layers.RNN layers
model = keras.models.Sequential([
    keras.layers.RNN(tf.keras.LSTMCell(20), return_sequences=True, input_shape=[None,1]),
    keras.layers.RNN(tf.keras.LSTMCell(20), return_sequences=True),
    keras.layers.TimeDistriuted(tf.keras.layers.Dense(10))                                
])

In [None]:
model.compile(loss=, optimizer='adam')
history = model.fit(dataset, epcohs=20)

In [12]:
abc = 'qwer'
type([abc])

list

In [14]:
[abc][0]

'qwer'

In [13]:
type(abc)

str