# Style transfer

#### Setup

In [None]:
import os

try:
    from google.colab import drive
    drive.mount('/content/drive')

    # Change the directory
    os.chdir('/content/drive/MyDrive/ColabNotebooks/style-transfer')
    is_colab = True
    print("Running on Colab")

except:
    is_colab = False
    print("Running locally")

In [2]:
# 35s
if is_colab:
    ! pip install pretty_midi
    # ! apt-get install musescore
    ! pip install dfply

    # ! pip install pyfluidsynth
    # ! apt install fluidsynth

    # !cp /usr/share/sounds/sf2/FluidR3_GM.sf2 ./font.sf2

In [None]:
#3s
import random
random.seed(42)

import glob
import pandas as pd

from utils.files_utils  import save_pickle, load_pickle, data_path
from utils.plots_utils import plot_train
from model.train import train_model
from model.colab_tension_vae.params import configs
from model.embeddings import obtain_embeddings, get_embeddings_roll_df
from preprocessing import preprocess_data
from tensorflow import keras

## Parámetros

In [4]:
#@title Parámetros ajustables

#@markdown Seleccionar directorios de los datasets
dataset_name = "sonata15_mapleleaf_ds" #@param {type:"string"}
model_name = "urania-test" #@param {type:"string"}
config_name = "8bar" #@param ["8bar", "4bar"]
config = configs[config_name]

#@markdown ¿Entrenamos un nuevo modelo?
if_train = True #@param {type:"boolean"}
entrenar_nuevo = True #@param {type:"boolean"}

#@markdown ¿Cuántas épocas? ¿Cada cuánto grabamos?
epochs =  10#@param {type:"integer"}
checkpt =  1#@param {type:"integer"}

#@markdown Datasets:
dataset1 = "Bach/" #@param {type:"string"}
dataset2 = "ragtime/" #@param {type:"string"}
dataset3 = "Mozart/" #@param {type:"string"}
dataset4 = "Frescobaldi/" #@param {type:"string"}


songs = {
    dataset1[:-1]: [data_path+dataset1+path for path in os.listdir(data_path+dataset1)],
    dataset2[:-1]: [data_path+dataset2+path for path in os.listdir(data_path+dataset2)],
    dataset3[:-1]: [data_path+dataset3+path for path in os.listdir(data_path+dataset3)],
    dataset4[:-1]: [data_path+dataset4+path for path in os.listdir(data_path+dataset4)],
}

In [None]:
#@title Transformar estilo
ds_original = "Bach" #@param ["Bach", "ragtime", "Mozart", "Frescobaldi"]
ds_objetivo = "Mozart" #@param ["Bach", "ragtime", "Mozart", "Frescobaldi"]

nombre_pickle = ds_original+"2"+ds_objetivo

____
# Preprocessing and Model

Se lo lleva al formato lista de:

- $n$ canciones = lista de:
- 2 voces = lista de:
- $e$ eventos (notas, silencios, etc.)


Armamos el DataFrame con el que manejamos las canciones. Cada fila es una canción y las columnas son:
- `Autor`
- `Título`
- `Id roll`: qué matriz es de la canción
- `Roll` objeto (GuoRoll) con:
    - `matrix`: matriz de $16*n \times 89$ con $n=$ la cantidad de compases
    - `bars`: cantidad de compases por matriz (es el mismo para todo el dataset, con lo cual, podría eliminarse la redundancia en un trabajo futuro)
    - `song`: referencia al objeto `song` al que pertenece si fue obtenido de ahí. Si fue obtenido desde un embedding, es `None` (en un trabajo futuro podría cambiárselo por un singleton).
    - `score`: partitura obtenida a partir de la matriz
    - `midi`: Pretty MIDI obtenido a partir de la matriz

Luego le agregaremos:
- `Embedding`: el vector resultado de encodear el roll

## Preprocessing

In [5]:
try:
    df_preprocessed = load_pickle(name=dataset_name, path=data_path+"preprocessed_data/")
except:
    df_preprocessed = preprocess_data(songs, config.bars)
    save_pickle(df_preprocessed, name=dataset_name, path=data_path+"preprocessed_data/")

## Model

In [None]:
if if_train:
    vae = train_model(
        df=df_preprocessed,
        model_name=model_name,
        config=config_name,
        new_training=entrenar_nuevo,
        final_epoch=epochs,
        ckpt=checkpt
    )
else:
    vae = keras.models.load_model(data_path + f"saved_models/{model_name}/")

In [None]:
glob.glob(rf"{data_path}logs/{model_name}_*.csv")

In [None]:
id_file =  790#@param {type:"integer"}

callbacks = pd.read_csv(data_path + f"logs/{model_name}_{id_file}.csv")
plot_train(callbacks, epochs)

## Reconstruction

In [None]:
# TODO: Llevar estas 2 celdas a una única función
try:
    df_emb = load_pickle(name=model_name, path=data_path + "embeddings/")
except:
    df_emb = obtain_embeddings(df_preprocessed, vae)
    save_pickle(df_emb, model_name, data_path + "embeddings/")


In [None]:
try:
    df_reconstructed = load_pickle(model_name + "recons", data_path + "embeddings/")
except:
    df_reconstructed = get_embeddings_roll_df(df_emb, vae)
    save_pickle(df_reconstructed, model_name + "recons", data_path + "embeddings/")

if is_colab:
  ! sudo apt install lilypond

In [None]:
for i, r in df_reconstructed.head().iterrows():
    print(r.roll.song.nombre)
    print("Original:")
    r.roll.display_score()
    print("Reconstrucción:")
    r.EmbeddingRoll.display_score()
    print("----------------------------------------------------------------------")

# Working space

In [None]:
df_reconstructed.head().roll[854].bars

# for i, r in df_reconstructed.head().iterrows():
#     print(r.roll.song.nombre)
#     print("Original:")
#     r.roll.display_score()
#     print("Reconstrucción:")
#     r.EmbeddingRoll.display_score()
#     print("----------------------------------------------------------------------")

In [None]:
df_preprocessed.head().roll[0].matrix.shape

In [None]:
! git status

In [None]:
!git config --global user.email "lsomacal@gmail.com"
!git config --global user.name "LucasSom"

In [None]:
! git add style-transfer.ipynb model/train.py

In [None]:
# !git commit -m "Running on colab without reload model"
! git push

In [None]:
from tensorflow import keras
from model.colab_tension_vae import build_model

vae = keras.models.load_model(data_path + f"saved_models/{model_name}/", custom_objects=dict(kl_beta=build_model.kl_beta))

In [None]:
vae.summary()