# Script to make .Jason Keys

In [None]:
!pip install librosa soundfile
import os
import json
import librosa
from tqdm import tqdm

# -------------------------------------
# Configuration (Edit Paths!)
# -------------------------------------
DRIVE_AUDIO_DIR = "/content/drive/MyDrive/uncoditional dataset/download genres/violin"
DRIVE_METADATA_DIR = "/content/drive/MyDrive/processed_datasets/violin keys"
ERROR_LOG = "/content/drive/MyDrive/processed_datasets/error_logV.txt"

# Ensure directories exist
os.makedirs(DRIVE_METADATA_DIR, exist_ok=True)
os.makedirs(os.path.dirname(ERROR_LOG), exist_ok=True)

# -------------------------------------
# Generate Metadata
# -------------------------------------
audio_files = [f for f in os.listdir(DRIVE_AUDIO_DIR) if f.lower().endswith(".mp3")]
open(ERROR_LOG, 'w').close()  # Reset error log

errors = []

for filename in tqdm(audio_files, desc="Processing MP3s"):
    audio_path = os.path.join(DRIVE_AUDIO_DIR, filename)
    name = os.path.splitext(filename)[0]
    output_path = os.path.join(DRIVE_METADATA_DIR, f"{name}.json")

    try:
        y, sr = librosa.load(audio_path, sr=None, mono=True, duration=None)

        # Check for empty audio
        if len(y) == 0:
            raise ValueError("Empty audio file.")

        duration = round(librosa.get_duration(y=y, sr=sr), 2)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # Validate BPM
        if tempo < 40 or tempo > 240:
            tempo = None

        metadata = {
            "key": "",
            "name": name,
            "file_extension": "mp3",
            "duration": duration,
            "sample_rate": sr,
            "bpm": int(tempo) if tempo else None,
            "artist": "Unknown Artist",
            "title": name,
            "genre": "relaxing violin",
            "description": "",
            "keywords": "",
            "instrument": "violin",
            "moods": []
        }

        with open(output_path, 'w') as f:
            json.dump(metadata, f, indent=2)

    except Exception as e:
        errors.append(filename)
        with open(ERROR_LOG, 'a') as f:
            f.write(f"{filename}: {str(e)}\n")

# Summary
print(f"Success: {len(audio_files) - len(errors)} | Failed: {len(errors)}")
print(f"Metadata saved to: {DRIVE_METADATA_DIR}")



  "bpm": int(tempo) if tempo else None,
Processing MP3s: 100%|██████████| 310/310 [16:10<00:00,  3.13s/it]

Success: 310 | Failed: 0
Metadata saved to: /content/drive/MyDrive/processed_datasets/violin keys





# Enhanced Version of .jason keys script

In [None]:
# Install required packages (Colab-specific)
!pip install librosa soundfile tqdm music-tag

# Import necessary libraries
import os              # For file/directory operations
import json            # To save metadata in JSON format
import librosa         # For audio analysis (loading, duration, BPM detection)
import numpy as np     # For numerical operations (e.g., checking NaN values)
import music_tag       # For mood/instrument/genre prediction
from tqdm import tqdm  # For progress bar during processing

# -------------------------------------
# Configuration: Set up paths
# Edit these to match your directory structure
# -------------------------------------
DRIVE_AUDIO_DIR = "/content/drive/MyDrive/uncoditional dataset/download genres/Piano"
DRIVE_METADATA_DIR = "/content/drive/MyDrive/processed_datasets/Piano keys"
ERROR_LOG = "/content/drive/MyDrive/processed_datasets/error_logV.txt"

# Ensure output directories exist
os.makedirs(DRIVE_METADATA_DIR, exist_ok=True)                    # Folder for JSON key files
os.makedirs(os.path.dirname(ERROR_LOG), exist_ok=True)            # Folder for error log

# -------------------------------------
# Find all MP3 files in the input folder
# -------------------------------------
audio_files = [f for f in os.listdir(DRIVE_AUDIO_DIR) if f.lower().endswith(".mp3")]

# List to store names of files that fail during processing
errors = []

# -------------------------------------
# Clear previous error log
# -------------------------------------
with open(ERROR_LOG, 'w') as f:
    pass  # Clears any existing content in the error log

# -------------------------------------
# Main Loop: Process Each Audio File
# -------------------------------------
for filename in tqdm(audio_files, desc="Processing Tracks"):
    # Build full paths for audio and metadata output
    audio_path = os.path.join(DRIVE_AUDIO_DIR, filename)
    name = os.path.splitext(filename)[0]  # Remove extension from filename
    output_path = os.path.join(DRIVE_METADATA_DIR, f"{name}.json")

    try:
        # Load audio file using librosa
        y, sr = librosa.load(audio_path, sr=None, mono=True)

        # Check if the loaded audio is empty
        if len(y) == 0:
            raise ValueError("Empty audio file.")

        # Calculate duration of the audio in seconds
        duration = round(librosa.get_duration(y=y, sr=sr), 2)

        # Estimate tempo (BPM) using librosa's beat tracker
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # Handle invalid tempo values
        if isinstance(tempo, float) and np.isnan(tempo):
            tempo = None  # No valid tempo detected
        elif tempo is not None:
            # Filter out unrealistic tempo values (too fast or too slow)
            if tempo < 40 or tempo > 240:
                tempo = None

        # Use music-tag to predict mood, genre, instrument, etc.
        try:
            f = music_tag.load_file(audio_path)
            predicted_mood = str(f['mood']).strip()
            predicted_instrument = str(f['instrument']).strip()
            predicted_genre = str(f['genre']).strip()
        except Exception as e:
            print(f"Tag prediction failed for {filename}: {e}")
            predicted_mood = ""
            predicted_instrument = ""
            predicted_genre = ""

        # Construct metadata dictionary
        metadata = {
            "key": "",  # Reserved for unique ID or future use
            "artist": "Unknown Artist",  # Placeholder, can be filled later
            "sample_rate": sr,  # From the audio file
            "file_extension": "mp3",  # Hardcoded since only MP3s are used
            "description": "",  # Empty placeholder
            "keywords": "",  # Empty placeholder
            "duration": duration,  # Calculated from the audio
            "bpm": int(round(tempo)) if tempo is not None else "",
            "genre": predicted_genre or "relaxing Piano",  # Use predicted genre if available
            "title": name,  # Based on filename
            "name": name,  # Used internally by systems like AudioCraft
            "instrument": predicted_instrument or "Piano",  # Use predicted instrument if available
            "moods": predicted_mood.split(",") if predicted_mood else []  # Split into list
        }

        # Save metadata as a JSON file
        with open(output_path, 'w') as f:
            json.dump(metadata, f, indent=2)

    except Exception as e:
        # If an error occurs, log the filename and exception message
        errors.append(filename)
        with open(ERROR_LOG, 'a') as f:
            f.write(f"{filename}: {str(e)}\n")

# -------------------------------------
# Print Summary of Results
# -------------------------------------
print(f"Success: {len(audio_files) - len(errors)} | Failed: {len(errors)}")
print(f"Metadata saved to: {DRIVE_METADATA_DIR}")

# .jason to CSv converter

In [None]:
import os
import json
import pandas as pd
from google.colab import files

# Define the folder containing the JSON files
folder_path = '/content/drive/MyDrive/processed_datasets/violin keys'  # Change this to your folder path

# List to hold the data from all JSON files
data_list = []

# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        file_path = os.path.join(folder_path, filename)

        # Open and load the JSON file
        with open(file_path, 'r') as json_file:
            data = json.load(json_file)

            # If the JSON file contains a list of objects, extend the data_list
            if isinstance(data, list):
                data_list.extend(data)
            else:
                data_list.append(data)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data_list)

# Save the DataFrame to a CSV file
csv_filename = 'violin keys.csv'
df.to_csv(csv_filename, index=False)

# Download the CSV file to your local machine
files.download(csv_filename)

print(f"CSV file '{csv_filename}' has been created and downloaded.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

CSV file 'violin keys.csv' has been created and downloaded.


# .jason to Csv converter enhanced Version

In [None]:
import os
import json
import pandas as pd
from google.colab import files

# Define the folder containing the JSON files
folder_path = '/content/drive/MyDrive/processed_datasets/Piano keys'  # Change this to your folder path

# Get folder name for CSV output
folder_name = os.path.basename(folder_path)
csv_filename = f"{folder_name}.csv"

# List to hold the data from all JSON files
data_list = []

# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        file_path = os.path.join(folder_path, filename)

        try:
            # Open and load the JSON file
            with open(file_path, 'r') as json_file:
                data = json.load(json_file)

                # Handle both single objects and lists
                if isinstance(data, list):
                    for item in data:
                        # Flatten moods if needed
                        if isinstance(item.get("moods"), list):
                            item["moods"] = ", ".join(item["moods"])
                    data_list.extend(data)
                else:
                    # Flatten moods if needed
                    if isinstance(data.get("moods"), list):
                        data["moods"] = ", ".join(data["moods"])
                    data_list.append(data)

        except Exception as e:
            print(f"Error reading {filename}: {e}")

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data_list)

# Save the DataFrame to a CSV file
df.to_csv(csv_filename, index=False)

# Download the CSV file to your local machine
files.download(csv_filename)

print(f"CSV file '{csv_filename}' has been created and downloaded.")