In [2]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
    IS_COLAB = True
except Exception:
    IS_COLAB = False

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
assert tf.__version__ >= "2.0"

if not tf.test.is_gpu_available():
    print("No GPU was detected. CNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "cnn"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
    

def plot_image(image):
    plt.imshow(image, cmap="gray", interpolation="nearest")
    plt.axis("off")

def plot_color_image(image):
    plt.imshow(image, interpolation="nearest")
    plt.axis("off")

No GPU was detected. CNNs can be very slow without a GPU.


In [3]:
import unicodedata
import re
import numpy as np
import os
import io
import time

In [29]:
vec1 = tf.random.normal((1, 3,5))
vec2 = tf.random.normal((1, 1,5))
vec12 = vec1*vec2
vec12_summed = tf.reduce_sum(vec12, axis=1)
print("vec1:", vec1)
print("vec2:", vec2)
print("vec1*vec2:", vec1*vec2)
print("vec12_summed:", vec12_summed)

print("vec1[:,0]", vec1[:,2:3])

vec1: tf.Tensor(
[[[ 0.36693168 -0.03142939 -0.05270752  0.28852358  1.4586115 ]
  [ 0.7110663   0.9906853  -0.20805798 -1.6630249  -0.22793514]
  [-0.67079324 -0.16073409 -1.4264216   0.30283737  0.94773036]]], shape=(1, 3, 5), dtype=float32)
vec2: tf.Tensor([[[-0.5523242  -0.00488456 -1.8659807   0.3550772  -0.37767205]]], shape=(1, 1, 5), dtype=float32)
vec1*vec2: tf.Tensor(
[[[-2.02665240e-01  1.53518602e-04  9.83512253e-02  1.02448151e-01
   -5.50876796e-01]
  [-3.92739117e-01 -4.83905757e-03  3.88232201e-01 -5.90502262e-01
    8.60847309e-02]
  [ 3.70495319e-01  7.85114651e-04  2.66167521e+00  1.07530646e-01
   -3.57931256e-01]]], shape=(1, 3, 5), dtype=float32)
vec12_summed: tf.Tensor([[-0.22490907 -0.00390042  3.1482587  -0.38052344 -0.8227233 ]], shape=(1, 5), dtype=float32)
vec1[:,0] tf.Tensor([[[-0.67079324 -0.16073409 -1.4264216   0.30283737  0.94773036]]], shape=(1, 1, 5), dtype=float32)


In [43]:
path_to_file = "c:\\Users\\ikuchin\\.keras\\datasets\\rus-eng\\rus.txt"
# path_to_zip = tf.keras.utils.get_file("rus-eng.zip", origin="http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip", extract=True)
# path_to_file = os.path.dirname(path_to_zip) + "/rus-eng/rus.txt"

In [52]:
def unicode_to_ascii(s):
    return ''.join(_ for _ in unicodedata.normalize('NFD', s)
        if unicodedata.category(_) != 'Mn')

def preprocess_sentense(sentence):
    w = unicode_to_ascii(sentence.lower().strip())
    return w

def create_dataset(path, num_examples):
    lines = io.open(path, encoding="UTF-8").read().strip().split("\n")
    word_pairs=[[preprocess_sentense(words) for words in line.split("\t")] for line in lines[:num_examples]]
#     word_pairs=[[words for words in line.split("\t")] for line in lines[:num_examples]]
    
    return zip(*word_pairs)

en, ru, _ = create_dataset(path_to_file, 10)

print(en)
print(ru)

('go.', 'go.', 'go.', 'hi.', 'hi.', 'hi.', 'hi.', 'hi.', 'run!', 'run!')
('марш!', 'иди.', 'идите.', 'здравствуите.', 'привет!', 'хаи.', 'здрасте.', 'здорово!', 'беги!', 'бегите!')


In [41]:
path_to_file

'C:\\Users\\ikuchin\\.keras\\datasets/rus-eng/rus.txt'