#### 1. Converting audio data into a structured data form allows for easier analysis and manipulation. Here's a step-by-step guide on how to convert audio data into structured data using Python: ####

In [None]:
### Load the Audio Data
# You can use the `librosa` library to load audio files and extract various features from them.

# pip install librosa

import librosa

In [None]:
# Load the audio file
y, sr = librosa.load("Jazzy_Piano_AI_Genrated.mp3")

# `y` contains the audio time series, and `sr` is the sampling rate.

#### 2. Extract Basic Features ####

In [None]:
# Temporal Features:
# Zero Crossing Rate: Rate of sign-changes along a signal.

zcr = librosa.feature.zero_crossing_rate(y)
print(len(zcr[0]))

In [None]:
# Spectral Features:
#Spectral Centroid: Indicates where the center of mass of the spectrum is located.
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

# Spectral Bandwidth: Width of the spectrum.
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
print(f"Spectral Centroid: {spectral_centroid[0][:5]}\n\nSpectral Bandwitdth: {spectral_bandwidth[0][:5]}")

In [None]:
# Rhythmic Features:
# Tempo and Beat Frames: 
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
print(f"Tempo: {tempo}\nBeat Frames: {beat_frames}")

#### 3. Extract Advanced Features

In [None]:
# Mel-Frequency Cepstral Coefficients (MFCC):
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
print(f"Mel frequencies: {len(mfccs)}")

In [None]:
# Chroma Feature:
# Represents the energy distribution across the 12 different pitch classes.
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
print(f"Chroma - Energy Distribution:\n{chroma}")

In [None]:
# Tonnetz:
# Captures harmonic relations in the audio.
tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
print(f"Numner of harmonic relations: {len(tonnetz)}")

#### 4. Structured Data Representation ####

Once the features are extracted, they can be represented in a structured form such as a DataFrame using `pandas`.

This structured data form makes it easier to analyze the extracted audio features, use them in machine learning models, or store them in databases for further processing.

In [None]:
import pandas as pd

# Create a dictionary of features
data = {
    'Zero Crossing Rate': zcr.mean(),
    'Spectral Centroid': spectral_centroid.mean(),
    'Spectral Bandwidth': spectral_bandwidth.mean(),
    'Tempo': tempo,
    'MFCC1': mfccs[0].mean(),
    'MFCC2': mfccs[1].mean(),
    #... add other MFCCs or features as needed
    'Chroma1': chroma[0].mean(),
    'Chroma2': chroma[1].mean(),
    #... add other Chroma features as needed
    'Tonnetz1': tonnetz[0].mean(),
    'Tonnetz2': tonnetz[1].mean()
    #... add other Tonnetz features as needed
}

# Convert dictionary to DataFrame
df = pd.DataFrame([data])

In [None]:
df