In [None]:
!pip uninstall -y tensorflow --quiet
!pip install ludwig
!pip install ludwig[llm]

In [None]:
!pip install datasets transformers

In [3]:
from datasets import load_dataset

books = load_dataset("opus_books", "en-es")

Downloading builder script:   0%|          | 0.00/6.08k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/161k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/9.26M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/93470 [00:00<?, ? examples/s]

In [4]:
books

DatasetDict({
    train: Dataset({
        features: ['id', 'translation'],
        num_rows: 93470
    })
})

In [5]:
books = books['train'].shuffle().select(range(5000))

In [6]:
import torch
import pandas as pd
import numpy as np

In [9]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))

get_ipython().events.register('pre_run_cell', set_css)

def clear_cache():
  if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [10]:
df = pd.DataFrame(books['translation'], index=[books['id']])
df = df.rename(columns={'en': 'english', 'es': 'spanish'})

In [11]:
df.head(5)

Unnamed: 0,english,spanish
55113,In this way they finished the swath.,Así concluyeron la primera hilera.
2091,This weather will keep many sportsmen in the c...,Este clima hará que muchos deportistas se qued...
32970,"« Ils étaient riches et moi j’étais pauvre, au...","»Al contrario que yo, eran ellos ricos, así qu..."
71739,"I spotted the rounded domes of its mosques, th...","Vi las cúpulas redondeadas de sus mezquitas, l..."
92262,Just as the poor little wretch was going to ta...,"En el momento en que el rapazuelo emprendió, a..."


In [None]:
import getpass
import locale; locale.getpreferredencoding = lambda: "UTF-8"
import logging
import os
import torch
import yaml

from ludwig.api import LudwigModel

In [None]:
qlora_fine_tuning_config = yaml.safe_load(
"""
model_type: llm
base_model: gpt2

input_features:
  - name: english
    type: text
    preprocessing:
          tokenizer: english_tokenize

output_features:
  - name: spanish
    type: text
    preprocessing:
          tokenizer: spanish_tokenize

prompt:
  template: >-
    you are an expert in translating english to spanish. below is an english sentence convert into to spanish sentence,as a Response that appropriately completes the request.
    ### English: {english}

    ### Response:

generation:
  temperature: 0.5
  max_new_tokens: 512

adapter:
  type: lora

quantization:
  bits: 4

preprocessing:
  global_max_sequence_length: 512
  split:
    type: random
    probabilities:
    - 0.8
    - 0.1
    - 0.1

trainer:
  type: finetune
  epochs: 5
  batch_size: 5
  eval_batch_size: 2
  gradient_accumulation_steps: 16
  learning_rate: 0.0004
  learning_rate_scheduler:
    warmup_fraction: 0.03
"""
)

In [15]:
model = LudwigModel(config=qlora_fine_tuning_config, logging_level=logging.INFO)

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [16]:
results = model.train(dataset=df)

INFO:ludwig.utils.print_utils:
INFO:ludwig.utils.print_utils:╒════════════════════════╕
INFO:ludwig.utils.print_utils:│ EXPERIMENT DESCRIPTION │
INFO:ludwig.utils.print_utils:╘════════════════════════╛
INFO:ludwig.utils.print_utils:
INFO:ludwig.api:╒══════════════════╤═════════════════════════════════════════════════════════════════════════════════════════╕
│ Experiment name  │ api_experiment                                                                          │
├──────────────────┼─────────────────────────────────────────────────────────────────────────────────────────┤
│ Model name       │ run                                                                                     │
├──────────────────┼─────────────────────────────────────────────────────────────────────────────────────────┤
│ Output directory │ /content/results/api_experiment_run                                                     │
├──────────────────┼─────────────────────────────────────────────────────────────────

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of gpt2 tokenizer
INFO:ludwig.features.text_feature:Max length of feature 'None': 920 (without start and stop symbols)
INFO:ludwig.features.text_feature:Setting max length using dataset: 922 (including start and stop symbols)
INFO:ludwig.features.text_feature:max sequence length is 922 for feature 'None'
INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of gpt2 tokenizer
INFO:ludwig.features.text_feature:Max length of feature 'spanish': 1024 (without start and stop symbols)
INFO:ludwig.features.text_feature:Setting max length using dataset: 1026 (including start and stop symbols)
INFO:ludwig.features.text_feature:max sequence length is 1026 for feature 'spanish'
INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of gpt2 tokenizer
INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of gpt2 tokenizer
INFO:ludwig.data.preprocessing:Building dataset: DONE
INFO:ludwig.data.cache.manager:Writin

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

INFO:ludwig.models.llm:Done.
INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of gpt2 tokenizer
INFO:ludwig.models.llm:Trainable Parameter Summary For Fine-Tuning
INFO:ludwig.models.llm:Fine-tuning with adapter: lora
INFO:ludwig.utils.print_utils:
INFO:ludwig.utils.print_utils:╒══════════╕
INFO:ludwig.utils.print_utils:│ TRAINING │
INFO:ludwig.utils.print_utils:╘══════════╛
INFO:ludwig.utils.print_utils:


trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.23643136409814364


INFO:ludwig.trainers.trainer:Creating fresh model training run.
INFO:ludwig.trainers.trainer:Training for 4000 step(s), approximately 5 epoch(s).
INFO:ludwig.trainers.trainer:Early stopping policy: 5 round(s) of evaluation, or 4000 step(s), approximately 5 epoch(s).

INFO:ludwig.trainers.trainer:Starting with step 0, epoch: 0


Training:  20%|██        | 800/4000 [02:39<13:46,  3.87it/s, loss=0.311]

INFO:ludwig.trainers.trainer:
Running evaluation for step: 800, epoch: 0


Evaluation valid: 100%|██████████| 100/100 [00:39<00:00,  2.56it/s]
Evaluation test : 100%|██████████| 100/100 [00:39<00:00,  2.54it/s]

INFO:ludwig.trainers.trainer:Evaluation took 1m 18.6826s

INFO:ludwig.utils.metrics_printed_table:╒═══════════════════════╤════════════╤══════════════╤════════════╕
│                       │      train │   validation │       test │
╞═══════════════════════╪════════════╪══════════════╪════════════╡
│ bleu                  │     0.0000 │       0.0000 │     0.0000 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ char_error_rate       │     2.5104 │       2.5604 │     2.5545 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ loss                  │     5.1026 │       4.5056 │     4.5289 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ next_token_perplexity │ 46809.2812 │   44859.8594 │ 44868.2031 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ perplexity            │ 50145.3672 │   50179.2344 │ 50178.1836 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ rouge1_fmeasure       │     0


Training:  40%|████      | 1600/4000 [06:42<09:23,  4.26it/s, loss=0.266]

INFO:ludwig.trainers.trainer:
Running evaluation for step: 1600, epoch: 1


Evaluation valid: 100%|██████████| 100/100 [00:34<00:00,  2.93it/s]
Evaluation test : 100%|██████████| 100/100 [00:30<00:00,  3.26it/s]


INFO:ludwig.trainers.trainer:Evaluation took 1m 5.8746s

INFO:ludwig.utils.metrics_printed_table:╒═══════════════════════╤════════════╤══════════════╤════════════╕
│                       │      train │   validation │       test │
╞═══════════════════════╪════════════╪══════════════╪════════════╡
│ bleu                  │     0.0000 │       0.0000 │     0.0000 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ char_error_rate       │     2.3457 │       2.4200 │     2.4033 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ loss                  │     4.6680 │       4.3671 │     4.3911 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ next_token_perplexity │ 45705.5195 │   44320.7188 │ 44310.4961 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ perplexity            │ 50160.1016 │   50173.7852 │ 50173.3984 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ rouge1_fmeasure       │     0.

Training:  60%|██████    | 2400/4000 [10:24<04:32,  5.86it/s, loss=0.275]

INFO:ludwig.trainers.trainer:
Running evaluation for step: 2400, epoch: 2


Evaluation valid: 100%|██████████| 100/100 [00:37<00:00,  2.70it/s]
Evaluation test : 100%|██████████| 100/100 [00:33<00:00,  2.96it/s]


INFO:ludwig.trainers.trainer:Evaluation took 1m 11.4849s

INFO:ludwig.utils.metrics_printed_table:╒═══════════════════════╤════════════╤══════════════╤════════════╕
│                       │      train │   validation │       test │
╞═══════════════════════╪════════════╪══════════════╪════════════╡
│ bleu                  │     0.0000 │       0.0000 │     0.0000 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ char_error_rate       │     2.1565 │       2.3848 │     2.3686 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ loss                  │     4.5631 │       4.2779 │     4.2993 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ next_token_perplexity │ 45132.0781 │   44018.7500 │ 43973.9688 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ perplexity            │ 50161.2930 │   50168.8555 │ 50167.2266 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ rouge1_fmeasure       │     0

Training:  80%|████████  | 3200/4000 [14:11<02:37,  5.08it/s, loss=0.267]

INFO:ludwig.trainers.trainer:
Running evaluation for step: 3200, epoch: 3


Evaluation valid: 100%|██████████| 100/100 [00:36<00:00,  2.71it/s]
Evaluation test : 100%|██████████| 100/100 [00:34<00:00,  2.92it/s]

INFO:ludwig.trainers.trainer:Evaluation took 1m 11.3304s

INFO:ludwig.utils.metrics_printed_table:╒═══════════════════════╤════════════╤══════════════╤════════════╕
│                       │      train │   validation │       test │
╞═══════════════════════╪════════════╪══════════════╪════════════╡
│ bleu                  │     0.0000 │       0.0000 │     0.0000 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ char_error_rate       │     2.1639 │       2.3535 │     2.3364 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ loss                  │     4.4141 │       4.2128 │     4.2356 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ next_token_perplexity │ 44521.4805 │   43685.9141 │ 43663.2305 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ perplexity            │ 50170.6250 │   50172.3008 │ 50171.9648 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ rouge1_fmeasure       │     0


Training: 100%|██████████| 4000/4000 [18:03<00:00,  3.44it/s, loss=0.277]

INFO:ludwig.trainers.trainer:
Running evaluation for step: 4000, epoch: 4


Evaluation valid: 100%|██████████| 100/100 [00:38<00:00,  2.61it/s]
Evaluation test : 100%|██████████| 100/100 [00:33<00:00,  3.00it/s]


INFO:ludwig.trainers.trainer:Evaluation took 1m 11.9455s

INFO:ludwig.utils.metrics_printed_table:╒═══════════════════════╤════════════╤══════════════╤════════════╕
│                       │      train │   validation │       test │
╞═══════════════════════╪════════════╪══════════════╪════════════╡
│ bleu                  │     0.0000 │       0.0000 │     0.0000 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ char_error_rate       │     2.1369 │       2.3177 │     2.3020 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ loss                  │     4.3963 │       4.1661 │     4.1863 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ next_token_perplexity │ 44328.1641 │   43449.2383 │ 43394.9844 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ perplexity            │ 50162.6328 │   50176.5586 │ 50176.0820 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ rouge1_fmeasure       │     0

Training: 100%|██████████| 4000/4000 [19:21<00:00,  3.44it/s, loss=0.277]

INFO:ludwig.utils.print_utils:
INFO:ludwig.utils.print_utils:╒═════════════════╕
INFO:ludwig.utils.print_utils:│ TRAINING REPORT │
INFO:ludwig.utils.print_utils:╘═════════════════╛
INFO:ludwig.utils.print_utils:
INFO:ludwig.api:╒══════════════════════════════╤═══════════════════╕
│ Validation feature           │ spanish           │
├──────────────────────────────┼───────────────────┤
│ Validation metric            │ loss              │
├──────────────────────────────┼───────────────────┤
│ Best model step              │ 4000              │
├──────────────────────────────┼───────────────────┤
│ Best model epoch             │ 5                 │
├──────────────────────────────┼───────────────────┤
│ Best model's validation loss │ 4.166132926940918 │
├──────────────────────────────┼───────────────────┤
│ Best model's test loss       │ 4.186305999755859 │
╘══════════════════════════════╧═══════════════════╛
INFO:ludwig.api:
Finished: api_experiment_run
INFO:ludwig.api:Saved to: /content/re




In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [18]:
!ludwig upload hf_hub --repo_id Naveengo/gpt2-fine-tuned-on-opus_books5000rows-en-to-es --model_path /content/results/api_experiment_run

adapter_model.bin: 100% 1.19M/1.19M [00:02<00:00, 421kB/s]
Model uploaded to `https://huggingface.co/Naveengo/gpt2-fine-tuned-on-opus_books5000rows-en-to-es/tree/main/` with repository name `Naveengo/gpt2-fine-tuned-on-opus_books5000rows-en-to-es`
