In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv(override=True)

key = os.getenv("GEMINI_API_KEY_1")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [2]:
import librosa
import numpy as np


def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    duration = librosa.get_duration(y=y, sr=sr)
    
    # Beat Tracking
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    beat_density = len(beats) / duration if duration > 0 else 0
    
    # Spectral Features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    
    # Pitch Features
    pitch, voiced_flag, voiced_prob = librosa.pyin(
        y,
        fmin=float(librosa.note_to_hz('C2')),
        fmax=float(librosa.note_to_hz('C7'))
    )
    pitch = pitch[~np.isnan(pitch)]
    pitch_range = np.max(pitch) - np.min(pitch) if len(pitch) > 0 else 0
    
    rms = librosa.feature.rms(y=y)
    dynamic_range = np.max(rms) - np.min(rms)
    
    return {
        "tempo": tempo,
        "beat_density": beat_density,
        "spectral_centroid": spectral_centroid,
        "spectral_bandwidth": spectral_bandwidth,
        "spectral_rolloff": spectral_rolloff,
        "pitch_range": pitch_range,
        "dynamic_range": dynamic_range
    }

In [3]:
import os


def process_artist_folder(folder_path):
    song_features = []

    for file in os.listdir(folder_path):
        if file.lower().endswith(".mp3"):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            song_features.append(features)

    return song_features

In [4]:
def aggregate_artist_style(song_features):
    tempos = [s["tempo"] for s in song_features]
    pitch_ranges = [s["pitch_range"] for s in song_features]
    centroids = [s["spectral_centroid"] for s in song_features]
    dynamics = [s["dynamic_range"] for s in song_features]

    return {
        "tempo_range": f"{int(np.percentile(tempos, 25))}–{int(np.percentile(tempos, 75))} BPM",
        "avg_tempo": np.mean(tempos),
        "melodic_range": "narrow" if np.mean(pitch_ranges) < 800 else "wide",
        "brightness": "dark" if np.mean(centroids) < 2500 else "bright",
        "dynamic_profile": "compressed" if np.mean(dynamics) < 0.05 else "dynamic"
    }


In [5]:
folder = "ip_audio/sickick"
song_data = process_artist_folder(folder)
artist_style = aggregate_artist_style(song_data)

print(artist_style)


{'tempo_range': '126–129 BPM', 'avg_tempo': np.float64(123.92352475128371), 'melodic_range': 'narrow', 'brightness': 'bright', 'dynamic_profile': 'dynamic'}


In [6]:
system_prompt = f"""You are a music style interpretation agent.

You are given an artist style profile extracted from audio analysis.
Your job is to interpret the numerical and categorical traits
into clear musical style constraints.

Artist Style Profile (JSON):
{artist_style}

Tasks:
1. Summarize the artist's core musical identity.
2. Identify non-negotiable traits.
3. Identify flexible traits that can be modified safely.
4. Incorporate the user modifier without breaking artist identity.

Output strictly in JSON with keys:
- core_identity
- fixed_traits
- flexible_traits
- adjusted_style_intent

"""

In [7]:
#agent 1 -> style interpreter

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("GEMINI_API_KEY_1")
)

response = client.chat.completions.create(
  model="xiaomi/mimo-v2-flash:free",
  messages=[
    {"role": "system", "content": system_prompt}]
)

# print(response.choices[0].message.content)

In [8]:
interpreter_output = response.choices[0].message.content

In [9]:
system_prompt1 = f"""
You are a music composition planning agent.

Using the interpreted artist style intent below,
design a completely original song blueprint.

Constraints:
- Do NOT reuse known melodies or songs.
- Use abstract musical planning only.
- Maintain artist identity through style traits.
- Prioritize originality and coherence.

Style Intent (JSON):
{interpreter_output}

Output strictly in JSON with keys:
- bpm
- key_and_mode
- song_structure
- chord_progression_templates
- rhythmic_intensity
- melodic_constraints
- energy_arc
- sound_palette


"""

In [10]:
load_dotenv(override=True)

key = os.getenv("key4")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [11]:
#agent 2 -> composition planner

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key4")
)

response1 = client.chat.completions.create(
  model="tngtech/deepseek-r1t2-chimera:free",
  messages=[
    {"role": "system", "content": system_prompt1}]
)
print(response1.choices[0].message.content)

```json
{
  "bpm": 126,
  "key_and_mode": "A Major",
  "song_structure": {
    "intro": { "bars": 8, "character": "Grounded syncopated bass with evolving percussive elements" },
    "verse": { "bars": 16, "character": "Rhythmic vocal chop motifs interacting with syncopated stabs" },
    "pre_chorus": { "bars": 8, "character": "Ascending harmonic tension with filtered synth layers" },
    "chorus": { "bars": 16, "character": "Full rhythmic intensity with bright synth chords and driving bass" },
    "breakdown": { "bars": 8, "character": "Dynamic reduction highlighting melodic motif development" },
    "outro": { "bars": 8, "character": "Percussive focus with bass elements fading into filtered delay trails" }
  },
  "chord_progression_templates": {
    "primary": "I - IV - vi - V (varied with first inversion voicings)",
    "chorus_variation": "I(add9) - IV(sus2) - vi - V(7)",
    "tension_builder": "ii - V - IV - I (half-time feel)"
  },
  "rhythmic_intensity": {
    "groove_foundation"

In [12]:
composition_plan = response1.choices[0].message.content

In [13]:
system_prompt2 = f"""
You are an originality and copyright safety agent.

Review the following song blueprint and evaluate:
1. Risk of similarity to existing songs
2. Overuse of common progressions
3. Whether the composition is safely original

Song Blueprint (JSON):
{composition_plan}

Output strictly in JSON:
- originality_score (0–100)
- risk_level (low / medium / high)
- issues_detected
- improvement_suggestions
"""

In [14]:
load_dotenv(override=True)

key = os.getenv("key5")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [15]:
#agent 3 -> originality evaluator
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key5")
)

response11 = client.chat.completions.create(
  model="nvidia/nemotron-3-nano-30b-a3b:free",
  messages=[
    {"role": "system", "content": system_prompt2}]
)

# print(response11.choices[0].message.content)


In [16]:
originality_evaluation = response11.choices[0].message.content

In [17]:
system_prompt3 = f"""
You are an explainability agent for a music AI system.

Explain how the final song blueprint reflects the artist style,
without referencing any specific songs.

Inputs:
Artist Style Profile (JSON):
{artist_style}

Final Song Blueprint (JSON):
{composition_plan}

Explain:
- Tempo choice
- Harmonic character
- Structure decisions
- Sound design choices

"""

In [18]:
load_dotenv(override=True)

key = os.getenv("key6")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [19]:
client1 = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key6")
)

response112 = client1.chat.completions.create(
  model="xiaomi/mimo-v2-flash:free",
  messages=[
    {"role": "system", "content": system_prompt3}]
)

print(response112.choices[0].message.content)

Here is how the final song blueprint reflects the artist style:

**Tempo Choice**
The blueprint operates at **126 BPM**, which sits comfortably within the artist's preferred high-energy range. While the artist's historical average suggests a slightly lower baseline, this tempo choice prioritizes the "bright" and dynamic aspects of the profile, ensuring the track hits the dance floor with sufficient drive to support the rhythmic intensity required.

**Harmonic Character**
The harmonic palette utilizes **A Major**, aligning with the "bright" stylistic tag. The progression choices—specifically the use of added 9ths and suspended chords—reinforce this clarity while the "dynamic" profile is reflected in the tension-building templates (ii - V - IV - I). This creates an uplifting foundation that avoids becoming overly complex, keeping the focus on the melodic accessibility implied by the "narrow" melodic range.

**Structure Decisions**
The arrangement follows a narrative arc that emphasizes t

In [20]:
print("t --- IGNORE ---")

t --- IGNORE ---


In [21]:
from music21 import stream, chord, tempo, key, meter

def generate_song_from_plan(plan):
    s = stream.Stream()

    # ---- Tempo ----
    s.append(tempo.MetronomeMark(number=plan["bpm"]))

    # ---- Key ----
    tonic, mode = plan["key_and_mode"].split()
    s.append(key.Key(tonic, mode.lower()))

    # ---- Time signature ----
    s.append(meter.TimeSignature("4/4"))

    # ---- Iterate through song structure ----
    for section_name, (start_bar, end_bar) in plan["song_structure"].items():
        bars = end_bar - start_bar
        base_section = section_name.split("_")[0]

        chord_sequence = SECTION_TO_PROGRESSION.get(base_section, ["D"])
        energy = resolve_energy(plan["energy_arc"], start_bar, end_bar)
        velocity = energy_to_velocity(energy)

        for _ in range(bars):
            for ch in chord_sequence:
                c = chord.Chord(CHORDS_IN_D_MAJOR[ch])
                c.quarterLength = 4
                c.volume.velocity = velocity
                s.append(c)

    return s


In [22]:
def resolve_energy(energy_arc, start_bar, end_bar):
    for bar_range, value in energy_arc.items():
        r_start, r_end = map(int, bar_range.split("-"))

        if start_bar >= r_start and end_bar <= r_end:
            if isinstance(value, int):
                return value
            else:
                return 60  # fallback for taper ranges

    return 70  # default energy


In [28]:
print("RAW VALUE ↓↓↓")
print(repr(composition_plan))
print("TYPE:", type(composition_plan))

RAW VALUE ↓↓↓
'```json\n{\n  "bpm": 126,\n  "key_and_mode": "A Major",\n  "song_structure": {\n    "intro": { "bars": 8, "character": "Grounded syncopated bass with evolving percussive elements" },\n    "verse": { "bars": 16, "character": "Rhythmic vocal chop motifs interacting with syncopated stabs" },\n    "pre_chorus": { "bars": 8, "character": "Ascending harmonic tension with filtered synth layers" },\n    "chorus": { "bars": 16, "character": "Full rhythmic intensity with bright synth chords and driving bass" },\n    "breakdown": { "bars": 8, "character": "Dynamic reduction highlighting melodic motif development" },\n    "outro": { "bars": 8, "character": "Percussive focus with bass elements fading into filtered delay trails" }\n  },\n  "chord_progression_templates": {\n    "primary": "I - IV - vi - V (varied with first inversion voicings)",\n    "chorus_variation": "I(add9) - IV(sus2) - vi - V(7)",\n    "tension_builder": "ii - V - IV - I (half-time feel)"\n  },\n  "rhythmic_inten

In [29]:
def clean_gemini_json(text):
    text = text.strip()

    if text.startswith("```"):
        text = text.split("```")[1]

    return text.strip()

In [31]:
print("AFTER CLEAN ↓↓↓")
print(repr(composition_plan))
print("LENGTH:", len(composition_plan))


AFTER CLEAN ↓↓↓
'json\n{\n  "bpm": 126,\n  "key_and_mode": "A Major",\n  "song_structure": {\n    "intro": { "bars": 8, "character": "Grounded syncopated bass with evolving percussive elements" },\n    "verse": { "bars": 16, "character": "Rhythmic vocal chop motifs interacting with syncopated stabs" },\n    "pre_chorus": { "bars": 8, "character": "Ascending harmonic tension with filtered synth layers" },\n    "chorus": { "bars": 16, "character": "Full rhythmic intensity with bright synth chords and driving bass" },\n    "breakdown": { "bars": 8, "character": "Dynamic reduction highlighting melodic motif development" },\n    "outro": { "bars": 8, "character": "Percussive focus with bass elements fading into filtered delay trails" }\n  },\n  "chord_progression_templates": {\n    "primary": "I - IV - vi - V (varied with first inversion voicings)",\n    "chorus_variation": "I(add9) - IV(sus2) - vi - V(7)",\n    "tension_builder": "ii - V - IV - I (half-time feel)"\n  },\n  "rhythmic_intens

In [34]:
import re


def parse_gemini_output(text):
    if not isinstance(text, str):
        raise TypeError("Gemini output must be a string")

    text = text.strip()

    # Remove leading 'json' line if present
    if text.lower().startswith("json"):
        text = text.split("\n", 1)[1].strip()

    # Extract JSON object
    start = text.find("{")
    end = text.rfind("}") + 1
    if start == -1 or end == 0:
        raise ValueError("No JSON object found")

    json_text = text[start:end]

    # Convert percentages (e.g. 65%) → integers (65)
    json_text = re.sub(r'(\d+)\s*%', r'\1', json_text)

    return json.loads(json_text)

In [35]:
cp = parse_gemini_output(composition_plan)
print(type(cp))

<class 'dict'>
