In [1]:
# 라이브러리
import os
import re
import json
import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
import IPython.display as ipd
import requests
from bs4 import BeautifulSoup

import tensorflow as tf
from transformers import AutoTokenizer, TFBartForConditionalGeneration, pipeline, AutoModelForSeq2SeqLM

  from .autonotebook import tqdm as notebook_tqdm


## Eng Summarization

In [2]:
def abs_crawling(name):
    base_url = 'https://paperswithcode.com/paper/'
    paper_name = name.replace(' ', '-')
    resp = requests.get(base_url+paper_name)
    soup = BeautifulSoup(resp.content, 'lxml')
    p_tags = soup.select('div.paper-abstract div.col-md-12 p')
    paper_abstract = p_tags[0].text.strip().replace('\n', ' ')
    return paper_abstract

In [3]:
texts = abs_crawling('attention is all you need')
texts

'The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transform

### 모델 불러오기

### TF model

In [4]:
Check_Point = 'facebook/bart-large-cnn'

tokenizer_bart = AutoTokenizer.from_pretrained(Check_Point)
model_bart = TFBartForConditionalGeneration.from_pretrained(Check_Point)

Downloading (…)"tf_model.h5";: 100%|██████████| 1.63G/1.63G [02:28<00:00, 10.9MB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
All model checkpoint layers were used when initializing TFBartForConditionalGeneration.

All the layers of TFBartForConditionalGeneration were initialized from the model checkpoint at facebook/bart-large-cnn.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBartForConditionalGeneration for predictions without further training.
Downloading (…)neration_config.json: 100%|██████████| 363/363 [00:00<00:00, 72.9kB/s]


In [5]:
def bart_sum(paper):
    input_token = tokenizer_bart(paper, max_length= 1024, return_tensors='tf', truncation=True)
    encoded_ids = model_bart.generate(input_token['input_ids'], num_beams=2)
    return tokenizer_bart.decode(tf.squeeze(encoded_ids), skip_special_tokens=True) #clean_up_tokenization_spaces=False    

In [6]:
sum_text = bart_sum(texts)
sum_text



'The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train.'

## Eng to Kor Translation

In [14]:
translator = pipeline('translation', model = 'facebook/nllb-200-distilled-600M')

ValueError: Could not load model facebook/nllb-200-distilled-600M with any of the following classes: (<class 'transformers.models.auto.modeling_tf_auto.TFAutoModelForSeq2SeqLM'>,).

In [None]:
def t5_trans(text):
    return translator(text, src_lang='en', tgt_lang='kor_Hang')

In [None]:
trans_text = t5_trans(sum_text)
trans_text

# TensorflowTTS real time E2E-TTS demonstration(Kor TTS Model)

This notebook provides a demonstration of the realtime E2E-TTS using TensorflowTTS for Korea (Using KSS dataset)

- Github: https://github.com/TensorSpeech/TensorflowTTS
- Colab for Eng: https://colab.research.google.com/drive/1akxtrLZHKuMiQup00tzO2olCaN-y3KiD?usp=sharing

## Load Model

In [15]:
import os
!git clone https://github.com/TensorSpeech/TensorFlowTTS.git
os.chdir("TensorFlowTTS")
!pip install .
os.chdir("..")
import sys
sys.path.append("TensorFlowTTS/")

fatal: destination path 'TensorFlowTTS' already exists and is not an empty directory.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing /content/drive/MyDrive/Paper_summary/TensorFlowTTS
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorflow-gpu==2.7.0
  Downloading tensorflow_gpu-2.7.0-cp38-cp38-manylinux2010_x86_64.whl (489.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m489.6/489.6 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-addons>=0.10.0
  Downloading tensorflow_addons-0.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m59.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface_hub==0.0.8
  Downloading huggingface_hub-0.0.8-py3-none-any.whl (34 kB)
Collecting unidecode>=1.1.1
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[2K     [90m━━━━━

In [16]:
!pip install git+https://github.com/repodiac/german_transliterate.git#egg=german_transliterate
!pip install h5py==2.10.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting german_transliterate
  Cloning https://github.com/repodiac/german_transliterate.git to /tmp/pip-install-6t644l86/german-transliterate_bd34beb37dbc41eba16ca147e2e433ed
  Running command git clone --filter=blob:none --quiet https://github.com/repodiac/german_transliterate.git /tmp/pip-install-6t644l86/german-transliterate_bd34beb37dbc41eba16ca147e2e433ed
  Resolved https://github.com/repodiac/german_transliterate.git to commit 4e01beba5f19adb9ccd72d0607a81f4ed7f6ee87
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting num2words
  Downloading num2words-0.5.12-py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.2/125.2 KB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docopt>=0.6.2
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ge

### Tacotron 2

In [17]:
!pip install keras==2.11

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras==2.11
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 2.7.0
    Uninstalling keras-2.7.0:
      Successfully uninstalled keras-2.7.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.11.0 requires tensorflow-estimator<2.12,>=2.11.0, but you have tensorflow-estimator 2.7.0 which is incompatible.
tensorflow-gpu 2.7.0 requires keras<2.8,>=2.7.0rc0, but you have keras 2.11.0 which is incompatible.[0m[31m
[0mSuccessfully installed keras-2.11.0


In [18]:
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

tacotron2 = TFAutoModel.from_pretrained("tensorspeech/tts-tacotron2-kss-ko", name="tacotron2")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Unzipping corpora/cmudict.zip.


Downloading (…)"model.h5";:   0%|          | 0.00/128M [00:00<?, ?B/s]

Downloading (…)olve/main/config.yml:   0%|          | 0.00/4.01k [00:00<?, ?B/s]

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


### Multi-band MelGAN

In [19]:
mb_melgan = TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-kss-ko", name="mb_melgan")

Downloading (…)"model.h5";:   0%|          | 0.00/10.2M [00:00<?, ?B/s]

Downloading (…)olve/main/config.yml:   0%|          | 0.00/5.82k [00:00<?, ?B/s]

## Inference
- The first time model run inference will very slow cause by @tf.function.

In [20]:
processor = AutoProcessor.from_pretrained("tensorspeech/tts-tacotron2-kss-ko")

Downloading (…)/main/processor.json:   0%|          | 0.00/2.37k [00:00<?, ?B/s]

In [21]:
def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name, vocoder_name):
  input_ids = processor.text_to_sequence(input_text)

  # text2mel part
  if text2mel_name == "TACOTRON":
    _, mel_outputs, stop_token_prediction, alignment_history = text2mel_model.inference(
        tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
        tf.convert_to_tensor([len(input_ids)], tf.int32),
        tf.convert_to_tensor([0], dtype=tf.int32)
    )
  else:
    raise ValueError("Only TACOTRON is supported on text2mel_name")

  # vocoder part
  if vocoder_name == "MB-MELGAN":
    audio = vocoder_model.inference(mel_outputs)[0, :, 0]
  else:
    raise ValueError("Only MB_MELGAN are supported on vocoder_name")

  if text2mel_name == "TACOTRON":
    return mel_outputs.numpy(), alignment_history.numpy(), audio.numpy()
  else:
    return mel_outputs.numpy(), audio.numpy()

def visualize_attention(alignment_history):
  import matplotlib.pyplot as plt

  fig = plt.figure(figsize=(8, 6))
  ax = fig.add_subplot(111)
  ax.set_title(f'Alignment steps')
  im = ax.imshow(
      alignment_history,
      aspect='auto',
      origin='lower',
      interpolation='none')
  fig.colorbar(im, ax=ax)
  xlabel = 'Decoder timestep'
  plt.xlabel(xlabel)
  plt.ylabel('Encoder timestep')
  plt.tight_layout()
  plt.show()
  plt.close()

def visualize_mel_spectrogram(mels):
  mels = tf.reshape(mels, [-1, 80]).numpy()
  fig = plt.figure(figsize=(10, 8))
  ax1 = fig.add_subplot(311)
  ax1.set_title(f'Predicted Mel-after-Spectrogram')
  im = ax1.imshow(np.rot90(mels), aspect='auto', interpolation='none')
  fig.colorbar(mappable=im, shrink=0.65, orientation='horizontal', ax=ax1)
  plt.show()
  plt.close()

In [22]:
input_text = trans_text

In [23]:
# setup window for tacotron2 if you want to try
tacotron2.setup_window(win_front=10, win_back=10)

### Tacotron2 + MB-MELGAN

In [25]:
mels, alignment_history, audios = do_synthesis(input_text, tacotron2, mb_melgan, "TACOTRON", "MB-MELGAN")
visualize_attention(alignment_history[0])
visualize_mel_spectrogram(mels[0])
ipd.Audio(audios, rate=22050)

TypeError: ignored