In [None]:
song_parts_from_ids_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/DataCrawling/DataPreprocessing/song_parts_from_ids"
path_to_save_preprocessed_data = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/DataCrawling/ProcessedData/midi_from_json_songs"

In [None]:
midi_source_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI"
midi_lib_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/midi_lib"

In [None]:
import sys
sys.path.append(midi_lib_path)

import pandas as pd
import json
import os

import converter.hooktheory_utils as htu
from converter.hooktheory_json_song_part_to_midi_dto import hooktheory_json_song_part_to_midi_dto_converter
from converter.midi_dto_to_midi_file import midi_dto_to_midi_file_converter
from converter.chord_dto_to_note_dtos import chord_dto_to_note_dtos_converter
from converter.hooktheory_json_chord_to_chord_dto import hooktheory_json_chord_to_chord_dto_converter

from dto.KeySignature import KeySignatureDTO

import const.midi as mc

import midi_utils as mu

In [None]:
json_files = []

# List all json file in song_parts_from_ids_path (recursively)
for root, dirs, files in os.walk(song_parts_from_ids_path):
    for file in files:
        if file.endswith(".json"):
            json_files.append(os.path.join(root, file))

print(len(json_files))

In [None]:
json_song_paths = []

# List all song that has at least 1 part having main_data_type = "json"
for json_file in json_files:
    with open(json_file) as f:
        data = json.load(f)
        for part in data["song_parts"]:
            if part["main_data_type"] == "json":
                json_song_paths.append(json_file)
                break

print(len(json_song_paths))

In [None]:
class JSONSongPartProcessingLog:
    def __init__(
        self, 
        json_file_path,
        song_link,
        song_part_id, 
        song_part_section, 
        midi_file_path
    ):
        self.json_file_path = json_file_path
        self.song_link = song_link
        self.song_part_id = song_part_id
        self.song_part_section = song_part_section
        self.midi_file_path = midi_file_path
    
    def to_dict(self):
        return {
            "json_file_path": self.json_file_path,
            "song_link": self.song_link,
            "song_part_id": self.song_part_id,
            "song_part_section": self.song_part_section,
            "midi_file_path": self.midi_file_path
        }

In [None]:
logs = {}

current_idx = 0
n_json_files = len(json_song_paths)

# Convert json song parts to midi
for json_song_path in json_song_paths:
    current_idx += 1
    print(f"Processing {current_idx}/{n_json_files} json files")

    with open(json_song_path) as f:
        data = json.load(f)
        
        json_song_parts = [part for part in data["song_parts"] if part["main_data_type"] == "json"]
        
        for part in json_song_parts:
            # Config path to save midi file
            song_relative_path: str = json_song_path.replace(song_parts_from_ids_path, "")

            processed_song_file_name = song_relative_path.replace("/", "_").replace(".json", "")

            song_part_section = part["metadata"]["section"]
            
            song_part_id = part["song_part_id"]

            processed_song_file_name = f"{processed_song_file_name}_{song_part_section}_{song_part_id}.mid"

            if processed_song_file_name[0] == "_":
                processed_song_file_name = processed_song_file_name[1:]
            else:
                pass

            midi_file_path = os.path.join(path_to_save_preprocessed_data, processed_song_file_name)

            song_link = data["link"]
            
            print(f"CONVERTING: {json_song_path}")
            print(f"   section: {song_part_section}")
            print(f"      link: {song_link}")
            
            # Convert json song part to midi dto
            midi_dto = hooktheory_json_song_part_to_midi_dto_converter(part)

            # Convert midi dto to midi file
            midi_dto_to_midi_file_converter(midi_dto, midi_file_path)
            
            print(f"-----> SONG CONVERTED: {midi_file_path}")
            print(f"                 from: {json_song_path}")
            print(f"              section: {song_part_section}")
            print(f"                 link: {song_link}")

            if song_part_id not in logs:
                logs[song_part_id] = JSONSongPartProcessingLog(
                    json_file_path=json_song_path,
                    song_link=song_link,
                    song_part_id=song_part_id,
                    song_part_section=song_part_section,
                    midi_file_path=midi_file_path
                ).to_dict()
            else:
                print(f"ERROR: {song_part_id} already exists in logs")
            
            print("")

In [None]:
# Save logs as json
log_json_path = os.path.join(path_to_save_preprocessed_data, "logs.json")
with open(log_json_path, "w") as f:
    json.dump(logs, f)

In [None]:
notify_file_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/hooktheory/output/logs/notify.txt"

with open(notify_file_path, "w") as f:
    f.write(f"DONE")