# Save Models

Please note that this notebook does not seem to be working well on KMP. Models were downloaded via the command line or via a specific script.

In [1]:
# Set up cache in which to save models
# https://stackoverflow.com/questions/63312859/how-to-change-huggingface-transformers-default-cache-directory
# MUST BE DONE BEFORE IMPORTING THE LIBRARY!
import os
os.environ['TRANSFORMERS_CACHE'] = '/mnt/efs/data/.cache'

In [2]:
# Imports
import configparser
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer



In [3]:
# Added as per https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf#scrollTo=WQ-BLtJG9b38
from torch import bfloat16
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=bfloat16
)

In [4]:
# Initialize config parser
config = configparser.ConfigParser()
config.read("/mnt/efs/data/AIEresearch/config.ini")

['/mnt/efs/data/AIEresearch/config.ini']

In [5]:
# Set the Hugging Face authorization token 
# Access to Llama2 from the Hugging Face requires users to submit a request form
hf_auth = config['huggingface']['auth_sergio']

In [6]:
# Set path for saved models
path_saved_models = '/mnt/efs/data/saved_models'

In [7]:
# Helper function to save model
def save_model(model_name, pipe, path_saved_models=path_saved_models):
    '''
    Save a Hugging Face model to path_saved_models.

    Args:
        model_name (str): Name of model being saved. Stick to the 
            Hugging Face names. 
        pipe (transformers.pipelines.text_generation.TextGenerationPipeline): 
            Hugging Face pipeline with model. 
        path_saved_models (str): Dir where models are saved.

    Returns:
        None
    '''
    # Create path for model if not existing, else error out
    path_model_save = os.path.join(path_saved_models, model_name)
    if not os.path.exists(path_model_save):
        os.mkdir(path_model_save)
    else:
        raise ValueError(f"Already exists and will not be saved over: {path_model_save}")
    
    # Set save paths
    path_model_save_model     = os.path.join(path_model_save, 'model')
    path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

    # Get the model and tokenizer
    model     = pipe.model
    tokenizer = pipe.tokenizer

    # Save the model and tokenizer to disk
    model.save_pretrained(path_model_save_model)
    tokenizer.save_pretrained(path_model_save_tokenizer)

    # Print message
    print(f"Successfully saved {model_name}")

# [Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)

* This is the Hugging Face variant of the model, which will need to be tested. 

In [8]:
model_name = 'Llama-2-7b-chat-hf'

## Save

In [9]:
pipe = pipeline("text-generation", model=f"meta-llama/{model_name}", token=hf_auth)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
save_model(model_name, pipe, path_saved_models=path_saved_models)

: 

: 

## Load

In [None]:
# Set model paths 
path_model_save           = os.path.join(path_saved_models, model_name)
path_model_save_model     = os.path.join(path_model_save, 'model')
path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

In [None]:
# Added as per https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf#scrollTo=WQ-BLtJG9b38
from torch import bfloat16
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=bfloat16
)

In [None]:
# Garbage collection and memory management
import gc
import torch
def report_gpu():
   print(torch.cuda.list_gpu_processes())
   gc.collect()
   torch.cuda.empty_cache()
report_gpu()

GPU:0
no processes are running


In [None]:
# loading the model after its been saved locally
# model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, load_in_4bit=True, device_map='auto')
model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, load_in_4bit=bnb_config, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(path_model_save_tokenizer)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
# GPU based >  
prompt = "Tell me about gravity"

model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

output = model.generate(**model_inputs)

print(tokenizer.decode(output[0], skip_special_tokens=True))

# [Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf)

* This is the Hugging Face variant of the model, which will need to be tested. 

In [2]:
model_name = 'Llama-2-13b-chat-hf'

## Save

In [12]:
pipe = pipeline("text-generation", model=f"meta-llama/{model_name}", token=hf_auth)

Downloading config.json: 100%|██████████| 587/587 [00:00<00:00, 74.5kB/s]
Downloading (…)fetensors.index.json: 100%|██████████| 33.4k/33.4k [00:00<00:00, 5.51MB/s]
Downloading (…)of-00003.safetensors: 100%|██████████| 9.95G/9.95G [00:20<00:00, 479MB/s]
Downloading (…)of-00003.safetensors: 100%|██████████| 9.90G/9.90G [00:20<00:00, 482MB/s]
Downloading (…)of-00003.safetensors: 100%|██████████| 6.18G/6.18G [00:12<00:00, 475MB/s]
Downloading shards: 100%|██████████| 3/3 [00:54<00:00, 18.14s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.46it/s]
Downloading generation_config.json: 100%|██████████| 188/188 [00:00<00:00, 30.6kB/s]
Downloading tokenizer_config.json: 100%|██████████| 1.62k/1.62k [00:00<00:00, 1.54MB/s]
Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 268MB/s]
Downloading tokenizer.json: 100%|██████████| 1.84M/1.84M [00:00<00:00, 31.2MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 414/414 [00:00<00:00, 356kB/s]


In [13]:
save_model(model_name, pipe, path_saved_models=path_saved_models)

Successfully saved Llama-2-13b-chat-hf


## Load

In [8]:
# Set model paths 
path_model_save           = os.path.join(path_saved_models, model_name)
path_model_save_model     = os.path.join(path_model_save, 'model')
path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

In [6]:
# Added as per https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf#scrollTo=WQ-BLtJG9b38
from torch import bfloat16
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=bfloat16
)

In [7]:
# Garbage collection and memory management
import gc
import torch
def report_gpu():
   print(torch.cuda.list_gpu_processes())
   gc.collect()
   torch.cuda.empty_cache()
report_gpu()

GPU:0
no processes are running


In [9]:
# loading the model after its been saved locally
# model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, load_in_4bit=True, device_map='auto')
model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, load_in_4bit=bnb_config, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(path_model_save_tokenizer)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [9]:
# GPU based >  
prompt = "Tell me about gravity"

model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

output = model.generate(**model_inputs)

print(tokenizer.decode(output[0], skip_special_tokens=True))

# [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf)

* This is the Hugging Face variant of the model, which will need to be tested. 

In [7]:
# set model name
model_name = 'Llama-2-70b-chat-hf'

## Save

In [8]:
# Create the pipe
pipe = pipeline("text-generation", model=f"meta-llama/{model_name}", token=hf_auth)

Downloading config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/66.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/15 [00:00<?, ?it/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.85G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.80G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/9.50G [00:00<?, ?B/s]

Downloading (…)of-00015.safetensors:   0%|          | 0.00/524M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]



Downloading generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
# Save the model
save_model(model_name, pipe, path_saved_models=path_saved_models)

## Load

In [None]:
# Set model paths 
path_model_save           = os.path.join(path_saved_models, model_name)
path_model_save_model     = os.path.join(path_model_save, 'model')
path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

In [None]:
# Added as per https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf#scrollTo=WQ-BLtJG9b38
from torch import bfloat16
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=bfloat16
)

In [None]:
# Garbage collection and memory management
import gc
import torch
def report_gpu():
   print(torch.cuda.list_gpu_processes())
   gc.collect()
   torch.cuda.empty_cache()
report_gpu()

GPU:0
no processes are running


In [None]:
# loading the model after its been saved locally
# model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, load_in_4bit=True, device_map='auto')
model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, load_in_4bit=bnb_config, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(path_model_save_tokenizer)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
# GPU based >  
prompt = "Tell me about gravity"

model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

output = model.generate(**model_inputs)

print(tokenizer.decode(output[0], skip_special_tokens=True))

# [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) 

In [5]:
model_name = 'Mistral-7B-Instruct-v0.2'

## Save

In [6]:
pipe = pipeline("text-generation", model=f"mistralai/{model_name}")

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [8]:
# Save model
save_model(model_name, pipe)

Successfully saved Mistral-7B-Instruct-v0.2


## Load

In [9]:
# Set model paths 
path_model_save           = os.path.join(path_saved_models, model_name)
path_model_save_model     = os.path.join(path_model_save, 'model')
path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

## Test

In [10]:
# loading the model after its been saved locally
model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, quantization_config=bnb_config, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(path_model_save_tokenizer)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [11]:
# GPU based >  
prompt = "Tell me about gravity"

In [12]:
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

In [13]:
output = model.generate(**model_inputs, 
                        max_new_tokens=2048)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [14]:
print(tokenizer.decode(output[0], skip_special_tokens=True))

Tell me about gravity.

Gravity is a fundamental force of nature that attracts two objects towards each other. It is the force that keeps planets in orbit around the sun, moons in orbit around planets, and objects on the surface of a planet. Gravity is also what causes objects to fall towards the ground when dropped.

Gravity is described by the theory of general relativity, which was developed by Albert Einstein in 1915. According to this theory, massive objects cause a distortion in the fabric of spacetime, which is felt as a gravitational force. The strength of the gravitational force depends on the mass of the objects and the distance between them.

Gravity is a universal force, meaning it affects all objects with mass or energy, no matter how small or large. It is the strongest of the four fundamental forces of nature, which also include electromagnetism, the strong nuclear force, and the weak nuclear force.

Gravity is a complex and fascinating phenomenon that continues to be stu

# [Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

In [6]:
model_name = 'Mixtral-8x7B-Instruct-v0.1'

## Save

In [6]:
pipe = pipeline("text-generation", model=f"mistralai/{model_name}")

Downloading shards:   0%|          | 0/19 [00:00<?, ?it/s]

model-00001-of-00019.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00005-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00006-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00007-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00008-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00009-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00010-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00011-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00012-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00013-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00014-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00015-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00016-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00017-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00018-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00019-of-00019.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [7]:
# Save model
save_model(model_name, pipe)

Successfully saved Mixtral-8x7B-Instruct-v0.1


## Load

In [7]:
# Set model paths 
path_model_save           = os.path.join(path_saved_models, model_name)
path_model_save_model     = os.path.join(path_model_save, 'model')
path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

In [8]:
# loading the model after its been saved locally
model     = AutoModelForCausalLM.from_pretrained(path_model_save_model, quantization_config=bnb_config, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(path_model_save_tokenizer)

ValueError: 
                        Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
                        the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
                        these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
                        `device_map` to `from_pretrained`. Check
                        https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                        for more details.
                        

## Test

In [12]:
# GPU based >  
prompt = "Tell me about gravity"

In [10]:
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

NameError: name 'tokenizer' is not defined

In [None]:
output = model.generate(**model_inputs, 
                        max_new_tokens=2048)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
print(tokenizer.decode(output[0], skip_special_tokens=True))

Tell me about gravity.

Gravity is a fundamental force of nature that attracts two objects towards each other. It is the force that keeps planets in orbit around the sun, moons in orbit around planets, and objects on the surface of a planet. Gravity is also what causes objects to fall towards the ground when dropped.

Gravity is described by the theory of general relativity, which was developed by Albert Einstein in 1915. According to this theory, massive objects cause a distortion in the fabric of spacetime, which is felt as a gravitational force. The strength of the gravitational force depends on the mass of the objects and the distance between them.

Gravity is a universal force, meaning it affects all objects with mass or energy, no matter how small or large. It is the strongest of the four fundamental forces of nature, which also include electromagnetism, the strong nuclear force, and the weak nuclear force.

Gravity is a complex and fascinating phenomenon that continues to be stu

# [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5)

In [7]:
model_name = 'BAAI/bge-small-en-v1.5'

## Save

In [6]:
model = SentenceTransformer("BAAI/bge-small-en-v1.5")

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [9]:
# Create path for model if not existing, else error out
path_model_save = os.path.join(path_saved_models, model_name)
# Set save paths
path_model_save_model = os.path.join(path_model_save, 'model')
# Save the model and tokenizer to disk
model.save(path_model_save_model)
# Print message
print(f"Successfully saved {model_name}")

Successfully saved BAAI/bge-small-en-v1.5


# [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)

In [9]:
model_name = "speecht5_tts"

In [10]:
pipe = pipeline("text-to-speech", model=f"microsoft/{model_name}")

config.json:   0%|          | 0.00/2.06k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/585M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/232 [00:00<?, ?B/s]

spm_char.model:   0%|          | 0.00/238k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/40.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/433 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/50.7M [00:00<?, ?B/s]

In [11]:
# Save model
save_model(model_name, pipe)

Successfully saved speecht5_tts


In [12]:
# Set model paths 
path_model_save           = os.path.join(path_saved_models, model_name)
path_model_save_model     = os.path.join(path_model_save, 'model')
path_model_save_tokenizer = os.path.join(path_model_save, 'tokenizer')

In [13]:
pip install datasets soundfile

Collecting soundfile
  Downloading soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m60.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: soundfile
Successfully installed soundfile-0.12.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install backports.lzma

Collecting backports.lzma
  Downloading backports.lzma-0.0.14.tar.gz (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: backports.lzma
  Building wheel for backports.lzma (pyproject.toml) ... [?25ldone
[?25h  Created wheel for backports.lzma: filename=backports.lzma-0.0.14-cp311-cp311-linux_x86_64.whl size=55706 sha256=ca6837c2161323b541f8fd4c238245d0d2279b6e875ae3ce5eee6573e163f709
  Stored in directory: /home/ec2-user/.cache/pip/wheels/f9/07/22/2a7c08deadad1af9f30a77e1efa74ae78b322488b8014afd07
Successfully built backports.lzma
Installing collected packages: backports.lzma
Successfully installed backports.lzma-0.0.14
Note: you may need to restart the kernel to use updated packages.


In [1]:
from datasets import load_dataset
import soundfile as sf
import torch
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan

In [None]:
processor = SpeechT5Processor.from_pretrained(path_model_save_model)
model = SpeechT5ForTextToSpeech.from_pretrained(path_model_save_model)
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

In [None]:
inputs = processor(text="Hello, my dog is cute.", return_tensors="pt")

In [None]:
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

In [None]:
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

In [None]:
sf.write("speech.wav", speech.numpy(), samplerate=16000)