This notebook share the pretrained PhraseVAE and PhraseLDM. That includes
- PhraseVAE (models obtained after each stage)
    - Span infilling pretrained model
    - PhraseAE
    - PhraseVAE
- PhraseLDM
    - Unconditional model
    - Length conditioned model
    - Length and structure conditioned model

Follow the tutorial in [Hugging Face doc](https://huggingface.co/docs/hub/models-uploading#upload-a-pytorch-model-using-huggingfacehub)

In [3]:
import os
import sys

# Add project root to sys.path
cwd = os.getcwd()
dirof = os.path.dirname
project_root = dirof(cwd)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import torch
from models.phrase_vae import load_t5_model_from_lit_ckpt
from models.vae_inference import PhraseVAE

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Phrase VAE

# Create model
model = PhraseVAE()

# save locally
model.save_pretrained("/data1/longshen/Results/AccGenResults/hf_ckpts/phrase-vae")

# push to the hub
model.push_to_hub("LongshenOu/phrase-vae")



Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   1%|          |  555kB / 57.2MB,  252kB/s  
[A
[A
Processing Files (0 / 1)                :   2%|▏         | 1.11MB / 57.2MB,  397kB/s  
[A
Processing Files (0 / 1)                :   4%|▍         | 2.22MB / 57.2MB,  694kB/s  
Processing Files (0 / 1)                :   7%|▋         | 3.89MB / 57.2MB, 1.14MB/s  
Processing Files (0 / 1)                :   9%|▊         | 5.00MB / 57.2MB, 1.39MB/s  
[A
Processing Files (0 / 1)                :  12%|█▏        | 6.66MB / 57.2MB, 1.67MB/s  
Processing Files (0 / 1)                :  17%|█▋        | 10.0MB / 57.2MB, 2.38MB/s  
Processing Files (0 / 1)                :  19%|█▉        | 11.1MB / 57.2MB, 2.52MB/s  
Processing Files (0 / 1)                :  25%|██▌       | 14.4MB / 57.2MB, 3.14MB/s  
Processing Files (0 / 1)                :  27%|██▋       | 15.5MB / 57.2MB, 3.24MB/s  
Processing Files (0 / 1)   

CommitInfo(commit_url='https://huggingface.co/LongshenOu/phrase-vae/commit/1ff2701fd91f93cd8905598656775d47436a2abd', commit_message='Push model using huggingface_hub.', commit_description='', oid='1ff2701fd91f93cd8905598656775d47436a2abd', pr_url=None, repo_url=RepoUrl('https://huggingface.co/LongshenOu/phrase-vae', endpoint='https://huggingface.co', repo_type='model', repo_id='LongshenOu/phrase-vae'), pr_revision=None, pr_num=None)

In [None]:
# Unconditional Phrase LDM

from models.ldm_inference import PhraseLDM

# Create model
ckpt_fp = '/data1/longshen/Results/AccGenResults/diffusion_prior/phr_latent/full_song/64dim_latent/unconditional/tb_logs/version_2/checkpoints/step_step=200000.ckpt'
model = PhraseLDM(ckpt_fp=ckpt_fp, length_control=False, sec_control=False)

# save locally
model.save_pretrained("/data1/longshen/Results/AccGenResults/hf_ckpts/phrase-ldm-uncond")

# push to the hub
model.push_to_hub("LongshenOu/phrase-ldm-uncond")

VAE scale factor: 0.7590118646621704


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   0%|          |  551kB /  118MB,  344kB/s  
[A
[A
[A
Processing Files (0 / 1)                :   1%|          | 1.10MB /  118MB,  459kB/s  
Processing Files (0 / 1)                :   2%|▏         | 2.76MB /  118MB, 1.06MB/s  
Processing Files (0 / 1)                :   4%|▎         | 4.41MB /  118MB, 1.57MB/s  
[A
Processing Files (0 / 1)                :   5%|▍         | 5.51MB /  118MB, 1.72MB/s  
Processing Files (0 / 1)                :   7%|▋         | 7.72MB /  118MB, 2.27MB/s  
Processing Files (0 / 1)                :   9%|▉         | 10.5MB /  118MB, 2.91MB/s  
Processing Files (0 / 1)                :  11%|█         | 12.7MB /  118MB, 3.34MB/s  
Processing Files (0 / 1)                :  15%|█▍        | 17.6MB /  118MB, 4.41MB/s  
Processing Files (0 / 1)                :  17%|█▋        | 20.4MB /  118MB, 4.86MB/s  
Processing Files (0 / 1)   

CommitInfo(commit_url='https://huggingface.co/LongshenOu/phrase-ldm-uncond/commit/0dd15ef6ee9d4e05cfa57dd219ea23017f1b68fe', commit_message='Push model using huggingface_hub.', commit_description='', oid='0dd15ef6ee9d4e05cfa57dd219ea23017f1b68fe', pr_url=None, repo_url=RepoUrl('https://huggingface.co/LongshenOu/phrase-ldm-uncond', endpoint='https://huggingface.co', repo_type='model', repo_id='LongshenOu/phrase-ldm-uncond'), pr_revision=None, pr_num=None)

In [2]:
# span infilling pretraining

from models.phrase_vae import S2SVQAE
# Create model
lit_ckpt_fp = '/data1/longshen/Results/AccGenResults/aes/pretrained/span_infill/epoch=99_step=53400_val_loss=0.3209.ckpt'
model = S2SVQAE.from_lit_ckpt(lit_ckpt_fp)

# save locally
model.save_pretrained("/data1/longshen/Results/AccGenResults/hf_ckpts/phrase-ae-span-infill")

# push to the hub
model.push_to_hub("LongshenOu/phrase-ae-span-infill")

dict_keys(['_class_path', 't5_model_name', 'tokenizer_path', 'vq_dim', 'vq_codebook_size', 'commitment_cost', 't5_config', 'lr', 'compress_style', 'n_compress_tokens', 'quantize_enc_out', 'lit_ckpt', '_instantiator'])
Using compress_style: full_sequence


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   1%|          |  556kB / 56.6MB,  309kB/s  
[A
[A
[A
Processing Files (0 / 1)                :   2%|▏         | 1.11MB / 56.6MB,  428kB/s  
Processing Files (0 / 1)                :   4%|▍         | 2.22MB / 56.6MB,  794kB/s  
[A
Processing Files (0 / 1)                :   7%|▋         | 3.89MB / 56.6MB, 1.22MB/s  
Processing Files (0 / 1)                :   9%|▉         | 5.00MB / 56.6MB, 1.47MB/s  
Processing Files (0 / 1)                :  12%|█▏        | 6.67MB / 56.6MB, 1.85MB/s  
[A
Processing Files (0 / 1)                :  17%|█▋        | 9.45MB / 56.6MB, 2.36MB/s  
Processing Files (0 / 1)                :  23%|██▎       | 12.8MB / 56.6MB, 3.04MB/s  
Processing Files (0 / 1)                :  25%|██▍       | 13.9MB / 56.6MB, 3.16MB/s  
Processing Files (0 / 1)                :  27%|██▋       | 15.6MB / 56.6MB, 3.38MB/s  
Processing Files (0 / 1

CommitInfo(commit_url='https://huggingface.co/LongshenOu/phrase-vae-pretrain/commit/7dd86b97b48d4527022ea542143ee001b6301225', commit_message='Push model using huggingface_hub.', commit_description='', oid='7dd86b97b48d4527022ea542143ee001b6301225', pr_url=None, repo_url=RepoUrl('https://huggingface.co/LongshenOu/phrase-vae-pretrain', endpoint='https://huggingface.co', repo_type='model', repo_id='LongshenOu/phrase-vae-pretrain'), pr_revision=None, pr_num=None)

In [2]:
# Phrase AE

from models.phrase_vae import S2SVQAE

# Create model
lit_ckpt_fp = '/data1/longshen/Results/AccGenResults/aes/pretrained/ae/phrase_ae/epoch=23_step=108432_val_loss=0.0038.ckpt'
model = S2SVQAE.from_lit_ckpt(lit_ckpt_fp)

# save locally
model.save_pretrained("/data1/longshen/Results/AccGenResults/hf_ckpts/phrase-ae-multi-query")

# push to the hub
model.push_to_hub("LongshenOu/phrase-ae-multi-query")

dict_keys(['_class_path', 't5_model_name', 'tokenizer_path', 'vq_dim', 'vq_codebook_size', 'vq_loss_weight', 'commitment_cost', 'apply_pre_vq_proj', 't5_config', 'lr', 'lr_vq', 'compress_style', 'n_compress_tokens', 'quantize_enc_out', 'lit_ckpt', '_instantiator'])
Skipping key model.pre_vq_proj.weight as it does not start with 'model.t5.'
Skipping key model.pre_vq_proj.bias as it does not start with 'model.t5.'
Skipping key model.vq.embedding.weight as it does not start with 'model.t5.'
Using compress_style: first_n_tokens


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   1%|          |  556kB / 56.6MB,  253kB/s  
[A
[A
Processing Files (0 / 1)                :   2%|▏         | 1.11MB / 56.6MB,  397kB/s  
[A
Processing Files (0 / 1)                :   5%|▍         | 2.78MB / 56.6MB,  869kB/s  
Processing Files (0 / 1)                :   7%|▋         | 3.89MB / 56.6MB, 1.14MB/s  
Processing Files (0 / 1)                :  10%|▉         | 5.56MB / 56.6MB, 1.54MB/s  
[A
Processing Files (0 / 1)                :  13%|█▎        | 7.23MB / 56.6MB, 1.81MB/s  
Processing Files (0 / 1)                :  18%|█▊        | 10.0MB / 56.6MB, 2.38MB/s  
Processing Files (0 / 1)                :  21%|██        | 11.7MB / 56.6MB, 2.65MB/s  
Processing Files (0 / 1)                :  23%|██▎       | 12.8MB / 56.6MB, 2.78MB/s  
Processing Files (0 / 1)                :  28%|██▊       | 16.1MB / 56.6MB, 3.36MB/s  
Processing Files (0 / 1)   

CommitInfo(commit_url='https://huggingface.co/LongshenOu/phrase-ae-multi-query/commit/5189148399955f595036e06529814616b4a16ae7', commit_message='Push model using huggingface_hub.', commit_description='', oid='5189148399955f595036e06529814616b4a16ae7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/LongshenOu/phrase-ae-multi-query', endpoint='https://huggingface.co', repo_type='model', repo_id='LongshenOu/phrase-ae-multi-query'), pr_revision=None, pr_num=None)

In [None]:
# Length conditioned PhraseLDM

from models.ldm_inference import PhraseLDM

# Create model
ckpt_fp = '/data1/longshen/Results/AccGenResults/diffusion_prior/phr_latent/full_song/64dim_latent/lencond/tb_logs/version_0/checkpoints/step_step=200000.ckpt'
model = PhraseLDM(ckpt_fp=ckpt_fp, length_control=True, sec_control=False)

# save locally
model.save_pretrained("/data1/longshen/Results/AccGenResults/hf_ckpts/phrase-ldm-lencond")

# push to the hub
model.push_to_hub("LongshenOu/phrase-ldm-lencond")

VAE scale factor: 0.7590118646621704


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   2%|▏         | 2.28MB /  118MB, 2.28MB/s  
[A
[A
[A
Processing Files (0 / 1)                :   6%|▌         | 6.68MB /  118MB, 3.71MB/s  
[A
Processing Files (0 / 1)                :   8%|▊         | 8.95MB /  118MB, 4.07MB/s  
Processing Files (0 / 1)                :  11%|█▏        | 13.5MB /  118MB, 5.60MB/s  
Processing Files (0 / 1)                :  12%|█▏        | 14.0MB /  118MB, 5.39MB/s  
[A
[A
Processing Files (0 / 1)                :  12%|█▏        | 14.6MB /  118MB, 4.55MB/s  
Processing Files (0 / 1)                :  13%|█▎        | 15.1MB /  118MB, 4.44MB/s  
Processing Files (0 / 1)                :  14%|█▍        | 16.8MB /  118MB, 4.65MB/s  
Processing Files (0 / 1)                :  15%|█▌        | 17.9MB /  118MB, 4.70MB/s  
Processing Files (0 / 1)                :  17%|█▋        | 20.1MB /  118MB, 5.01MB/s  
Processing Files (0

CommitInfo(commit_url='https://huggingface.co/LongshenOu/phrase-ldm-lencond/commit/5ef0e473dfd37b65bcd3894a84f132bb80359912', commit_message='Push model using huggingface_hub.', commit_description='', oid='5ef0e473dfd37b65bcd3894a84f132bb80359912', pr_url=None, repo_url=RepoUrl('https://huggingface.co/LongshenOu/phrase-ldm-lencond', endpoint='https://huggingface.co', repo_type='model', repo_id='LongshenOu/phrase-ldm-lencond'), pr_revision=None, pr_num=None)

In [None]:
# Length and structure conditioned PhraseLDM

from models.ldm_inference import PhraseLDM

# Create model
ckpt_fp = '/data1/longshen/Results/AccGenResults/diffusion_prior/phr_latent/full_song/64dim_latent/seccond/tb_logs/version_0/checkpoints/step_step=200000.ckpt'
model = PhraseLDM(ckpt_fp=ckpt_fp, length_control=True, sec_control=True)

# save locally
model.save_pretrained("/data1/longshen/Results/AccGenResults/hf_ckpts/phrase-ldm-seccond")

# push to the hub
model.push_to_hub("LongshenOu/phrase-ldm-seccond")

VAE scale factor: 0.7590118646621704


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   0%|          |  549kB /  130MB,  343kB/s  
[A
[A
[A
Processing Files (0 / 1)                :   1%|          | 1.10MB /  130MB,  458kB/s  
[A
[A
[A
Processing Files (0 / 1)                :   1%|▏         | 1.65MB /  130MB,  515kB/s  
[A
Processing Files (0 / 1)                :   2%|▏         | 2.20MB /  130MB,  610kB/s  
Processing Files (0 / 1)                :   2%|▏         | 2.81MB /  130MB,  739kB/s  
Processing Files (0 / 1)                :   3%|▎         | 3.36MB /  130MB,  840kB/s  
[A
Processing Files (0 / 1)                :   3%|▎         | 3.91MB /  130MB,  888kB/s  
Processing Files (0 / 1)                :   3%|▎         | 4.52MB /  130MB,  983kB/s  
Processing Files (0 / 1)                :   4%|▍         | 5.75MB /  130MB, 1.20MB/s  
Processing Files (0 / 1)                :   6%|▌         | 7.52MB /  130MB, 1.50MB/s  
Processing 

CommitInfo(commit_url='https://huggingface.co/LongshenOu/phrase-ldm-seccond/commit/b52806736914e01c483d94a6b7270800734bfd67', commit_message='Push model using huggingface_hub.', commit_description='', oid='b52806736914e01c483d94a6b7270800734bfd67', pr_url=None, repo_url=RepoUrl('https://huggingface.co/LongshenOu/phrase-ldm-seccond', endpoint='https://huggingface.co', repo_type='model', repo_id='LongshenOu/phrase-ldm-seccond'), pr_revision=None, pr_num=None)