In [1]:
import yaml
import os, pickle

import numpy as np

from typing import Optional
from pydantic import BaseModel
from beanie import Document
from tqdm import tqdm

In [3]:
env = yaml.load(open('config.yaml', 'r'), Loader=yaml.FullLoader)

In [4]:
class AttributePiece(BaseModel):
    polyph: list[int]
    rythm: list[int]

class ContentPiece(BaseModel):
    name: Optional[int| str]
    bar_pos: list
    content: list[dict]

class DataPiece(Document):
    dataset: Optional[str]
    version: Optional[str]
    content: ContentPiece
    attr_cls: AttributePiece

    class Settings:
        name = 'dataset'

In [5]:
def pickle_load(path):
  return pickle.load(open(path, 'rb'))

def pickle_dump(obj, f):
  pickle.dump(obj, open(f, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
  
def is_file(filename, format):
  if format in str(filename):
    return True
  return False

def npint2native(events: list):
  for event in events: 
    e_value = event['value']
    if type(e_value) == np.int64 or type(e_value) == np.int32:
        event['value'] = int(e_value)

  return events

In [35]:
# src_dir = r'agent\remi_dataset'
# leaf_polyph = r'agent\remi_dataset\attr_cls\polyph'
# leaf_rhythm = r'agent\remi_dataset\attr_cls\rhythm'
# dataset_name = 'AILabs.tw-Pop1K7'

# idx = 0
# for filename in tqdm(os.listdir(src_dir), desc='Transfering to DB ... '):
#     name = filename
#     filename = os.path.join(src_dir, filename)
#     if not is_file(filename, format='.pkl'):
#         continue

#     # Gather a file that contain events. 
#     bar_pos, events = pickle_load(filename)
#     events = npint2native(events=events)
#     content_piece = ContentPiece(name=name, bar_pos=bar_pos, content=events)

#     # Go to leaf dir to gather appropriate attribute-files to the file. 
#     polyph_file = os.path.join(leaf_polyph, name)
#     rhythm_file = os.path.join(leaf_rhythm, name)

#     pol_content = pickle_load(polyph_file)
#     rhy_content = pickle_load(rhythm_file)

#     attr_piece = AttributePiece(polyph=pol_content, rythm=rhy_content)
    
#     # Store to DB. 
#     data_piece = DataPiece( 
#                            dataset=dataset_name, 
#                            version='remi_data_v0',
#                            content=content_piece, 
#                            attr_cls=attr_piece)
    
#     await data_piece.insert()

#     idx += 1
    

In [7]:
def initialize_local_db(data_collection, tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm): 
    for document in tqdm(data_collection, desc='Intializing Local DB'): 
        id, dataset, version, content, attr_cls = document.values()

        name, bar_pos, events = content.values()
        polyph, rhythm = attr_cls.values()

        tgt_filename = os.path.join(tgt_dir, name)
        tgt_polyph = os.path.join(tgt_leaf_polyph, name)
        tgt_rhythm = os.path.join(tgt_leaf_rhythm, name)

        pickle_dump((bar_pos, events), f=tgt_filename)
        pickle_dump(polyph, tgt_polyph)
        pickle_dump(rhythm, tgt_rhythm)

def remove_samples_from_local_db(tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm, k_samples=10):
    samples = [sample for sample in os.listdir(tgt_dir) if '.pkl' in sample]
    if len(samples) <= k_samples:
        print('Small than expected') 
        return 
    rand_idx = np.random.choice(len(samples), k_samples, replace=False)
    for idx in tqdm(range(k_samples), desc='Removing files'):
        rand_sample = samples[rand_idx[idx]]
        rand_file = os.path.join(tgt_dir, rand_sample)
        rand_polyph = os.path.join(tgt_leaf_polyph, rand_sample)
        rand_rhythm = os.path.join(tgt_leaf_rhythm, rand_sample)

        os.remove(rand_file)
        os.remove(rand_polyph)
        os.remove(rand_rhythm)


In [11]:
import time 

""" 
    1. The function takes n samples and stores those in the local storage for usage. 
    2. The timer start counting delta-time. 
    3. When the timer meets a deadline, the function: 
        3.1. Eliminate n//k samples in the local storage. 
        3.2. Takes n _samples and stores those in the local storage. 
    4. Repeat step 1-3. 
"""
tgt_dir = r'data\examples\samples'
tgt_leaf_polyph = r'data\examples\samples\polyph'
tgt_leaf_rhythm = r'data\examples\samples\rhythm'

n_samples = 50
k_samples = 45
period_time = 60 * 3

from beanie import init_beanie
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase

client = AsyncIOMotorClient(
    "mongodb://localhost:27017"
)

# Initialize beanie with the Product document class and a database
await init_beanie(database=client.NeuralNotes, document_models=[DataPiece])
data_collection = await DataPiece.find().aggregate([{'$sample': {'size': 3}}]).to_list()
initialize_local_db(data_collection, tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm)
# with open(r'log.txt', 'a') as file: 
#     moment = time.ctime()
#     samples = [sample for sample in os.listdir(tgt_dir) if '.pkl' in sample]
#     sentence = f'[{moment}] Adding {k_samples} from {tgt_dir}, there are {len(samples)} left. \n'
#     file.write(sentence)
# while True: 
#     remove_samples_from_local_db(tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm, k_samples=k_samples)
#     with open(r'log.txt', 'a') as file: 
#         moment = time.ctime()
#         samples = [sample for sample in os.listdir(tgt_dir) if '.pkl' in sample]
#         sentence = f'[{moment}]: Remove {k_samples} from {tgt_dir}, there are {len(samples)} left. \n'
#         file.write(sentence)
#     data_collection = await DataPiece.find().aggregate([{'$sample': {'size': n_samples}}]).to_list()
#     initialize_local_db(data_collection, tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm)
#     with open(r'log.txt', 'a') as file: 
#         moment = time.ctime()
#         samples = [sample for sample in os.listdir(tgt_dir) if '.pkl' in sample]
#         sentence = f'[{moment}] Adding {k_samples} from {tgt_dir}, there are {len(samples)} left. \n'
#         file.write(sentence)
#     time.sleep(period_time)

Intializing Local DB: 100%|██████████| 3/3 [00:00<00:00, 272.74it/s]


In [15]:
tgt_dir = r'data\resource'
tgt_leaf_polyph = r'data\resource\attr_cls\polyph'
tgt_leaf_rhythm = r'data\resource\attr_cls\rhythm'
n_samples = 100
k_samples = 95
# k_samples = len(os.listdir(tgt_dir)) - 1
remove_samples_from_local_db(tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm, k_samples=k_samples)


Removing files: 100%|██████████| 1/1 [00:00<00:00, 499.68it/s]


In [15]:
np.random.choice(20, 10)

array([ 3,  5,  6, 16,  8,  4,  7,  6, 17, 15])

In [62]:
tgt_dir = r'datas\examples\samples'
tgt_leaf_polyph = r'datas\examples\samples\polyph'
tgt_leaf_rhythm = r'datas\examples\samples\rhythm'

data_collection = await DataPiece.find().aggregate([{'$sample': {'size': 3}}]).to_list()
for document in data_collection: 
    id, dataset, version, content, attr_cls = document.values()

    name, bar_pos, events = content.values()
    polyph, rhythm = attr_cls.values()

    tgt_filename = os.path.join(tgt_dir, name)
    tgt_polyph = os.path.join(tgt_leaf_polyph, name)
    tgt_rhythm = os.path.join(tgt_leaf_rhythm, name)

    pickle_dump((bar_pos, events), f=tgt_filename)
    pickle_dump(polyph, tgt_polyph)
    pickle_dump(rhythm, tgt_rhythm)

In [54]:
data_collection[0]
id, dataset, version, content, arr_cls = data_collection[0].values()

In [56]:
name, bar_pos, events = content.values()

In [9]:
import os

def get_filename_from_storage(data_dir):
    # The dataset receive integer index as an input, therefore, a name of a file should be change. 
    file_storage = os.listdir(data_dir)
    files = [int(pkl_file.split('.')[0]) for pkl_file in file_storage if '.pkl' in pkl_file]
    files.sort()
    filename = files[-1]  + 1
    return filename


get_filename_from_storage(data_dir='results')

[1023, 1060, 1208, 1230, 1325, 1438, 1539, 1635, 1668, 172, 253, 255, 26, 269, 27, 28, 286, 29, 30, 31, 361, 463, 628, 666, 718, 806, 889]
[26, 27, 28, 29, 30, 31, 172, 253, 255, 269, 286, 361, 463, 628, 666, 718, 806, 889, 1023, 1060, 1208, 1230, 1325, 1438, 1539, 1635, 1668]


1669

In [None]:
import yaml
import os, pickle

import numpy as np

from tqdm import tqdm
from models.data_piece import ContentPiece, AttributePiece, DataPiece


from beanie import init_beanie, Document
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase

from .utils import pickle_load, pickle_dump

src_dir = r'agent\remi_dataset'
leaf_polyph = r'agent\remi_dataset\attr_cls\polyph'
leaf_rhythm = r'agent\remi_dataset\attr_cls\rhythm'
dataset_name = 'AILabs.tw-Pop1K7'

tgt_dir = r'dataset\resource'
tgt_leaf_polyph = r'dataset\resource\polyph'
tgt_leaf_rhythm = r'dataset\resource\rhythm'

def is_file(filename, format):
  if format in str(filename):
    return True
  return False

def npint2native(events: list):
  for event in events: 
    e_value = event['value']
    if type(e_value) == np.int64 or type(e_value) == np.int32:
        event['value'] = int(e_value)

  return events

async def init():
    # Create Motor client
    client = AsyncIOMotorClient(
        "mongodb://user:pass@host:27017"
    )

    # Initialize beanie with the Product document class and a database
    await init_beanie(database=client.db_name, document_models=[DataPiece])


async def storing_lcl_db(dataset_name=None, version=None):
   if dataset_name:
      dataset_name = dataset_name
   else: 
      dataset_name = 'Unknown'

   if version: 
      version = version 
   else: 
      version = 'Unknown'

   for filename in tqdm(os.listdir(src_dir), desc='Transfering to DB ... '):
    name = filename
    filename = os.path.join(src_dir, filename)
    if not is_file(filename, format='.pkl'):
        continue

    # Gather a file that contain events. 
    bar_pos, events = pickle_load(filename)
    events = npint2native(events=events)
    content_piece = ContentPiece(name=name, bar_pos=bar_pos, content=events)

    # Go to leaf dir to gather appropriate attribute-files to the file. 
    polyph_file = os.path.join(leaf_polyph, name)
    rhythm_file = os.path.join(leaf_rhythm, name)

    pol_content = pickle_load(polyph_file)
    rhy_content = pickle_load(rhythm_file)

    attr_piece = AttributePiece(polyph=pol_content, rythm=rhy_content)
    
    # Store to DB. 
    data_piece = DataPiece( 
                           dataset=dataset_name, 
                           version=version,
                           content=content_piece, 
                           attr_cls=attr_piece)
    
    await data_piece.insert()


def initialize_local_db(data_collection, tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm): 
    for document in tqdm(data_collection, desc='Intializing Local DB'): 
        id, dataset, version, content, attr_cls = document.values()

        name, bar_pos, events = content.values()
        polyph, rhythm = attr_cls.values()

        tgt_filename = os.path.join(tgt_dir, name)
        tgt_polyph = os.path.join(tgt_leaf_polyph, name)
        tgt_rhythm = os.path.join(tgt_leaf_rhythm, name)

        pickle_dump((bar_pos, events), f=tgt_filename)
        pickle_dump(polyph, tgt_polyph)
        pickle_dump(rhythm, tgt_rhythm)

def remove_samples_from_local_db(tgt_dir, tgt_leaf_polyph, tgt_leaf_rhythm, k_samples=10):
    samples = [sample for sample in os.listdir(tgt_dir) if '.pkl' in sample]
    if len(samples) <= k_samples:
        print('Small than expected') 
        return 
    rand_idx = np.random.choice(len(samples), k_samples, replace=False)
    for idx in tqdm(range(k_samples), desc='Removing files'):
        rand_sample = samples[rand_idx[idx]]
        rand_file = os.path.join(tgt_dir, rand_sample)
        rand_polyph = os.path.join(tgt_leaf_polyph, rand_sample)
        rand_rhythm = os.path.join(tgt_leaf_rhythm, rand_sample)

        os.remove(rand_file)
        os.remove(rand_polyph)
        os.remove(rand_rhythm)


In [None]:
from agent.utils import word_to_event, write_midi

tgt_dir = r'data\examples\samples'
tgt_leaf_polyph = r'data\examples\samples\polyph'
tgt_leaf_rhythm = r'data\examples\samples\rhythm'

sample = pickle_load(os.path.join(tgt_dir, '838.pkl'))
remi_vocab = pickle_load(r'agent\pickles\remi_vocab_v2.pkl')
word2event = remi_vocab[1]

events = word_to_event(word2event, sample[1])
events

In [20]:
for ev in sample[1]: 
    if 'Tempo' in ev['name']:
        print(ev)

{'name': 'Tempo', 'value': 119}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 89}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name': 'Tempo', 'value': 86}
{'name'