In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv(override=True)

key = os.getenv("GEMINI_API_KEY_1")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [2]:
import librosa
import numpy as np


def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    duration = librosa.get_duration(y=y, sr=sr)
    
    # Beat Tracking
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    beat_density = len(beats) / duration if duration > 0 else 0
    
    # Spectral Features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    
    # Pitch Features
    pitch, voiced_flag, voiced_prob = librosa.pyin(
        y,
        fmin=float(librosa.note_to_hz('C2')),
        fmax=float(librosa.note_to_hz('C7'))
    )
    pitch = pitch[~np.isnan(pitch)]
    pitch_range = np.max(pitch) - np.min(pitch) if len(pitch) > 0 else 0
    
    rms = librosa.feature.rms(y=y)
    dynamic_range = np.max(rms) - np.min(rms)
    
    return {
        "tempo": tempo,
        "beat_density": beat_density,
        "spectral_centroid": spectral_centroid,
        "spectral_bandwidth": spectral_bandwidth,
        "spectral_rolloff": spectral_rolloff,
        "pitch_range": pitch_range,
        "dynamic_range": dynamic_range
    }

In [3]:
import os


def process_artist_folder(folder_path):
    song_features = []

    for file in os.listdir(folder_path):
        if file.lower().endswith(".mp3"):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            song_features.append(features)

    return song_features

In [4]:
def aggregate_artist_style(song_features):
    tempos = [s["tempo"] for s in song_features]
    pitch_ranges = [s["pitch_range"] for s in song_features]
    centroids = [s["spectral_centroid"] for s in song_features]
    dynamics = [s["dynamic_range"] for s in song_features]

    return {
        "tempo_range": f"{int(np.percentile(tempos, 25))}–{int(np.percentile(tempos, 75))} BPM",
        "avg_tempo": np.mean(tempos),
        "melodic_range": "narrow" if np.mean(pitch_ranges) < 800 else "wide",
        "brightness": "dark" if np.mean(centroids) < 2500 else "bright",
        "dynamic_profile": "compressed" if np.mean(dynamics) < 0.05 else "dynamic"
    }


In [5]:
folder = "ip_audio/sickick"
song_data = process_artist_folder(folder)
artist_style = aggregate_artist_style(song_data)

print(artist_style)


{'tempo_range': '126–129 BPM', 'avg_tempo': np.float64(123.92352475128371), 'melodic_range': 'narrow', 'brightness': 'bright', 'dynamic_profile': 'dynamic'}


In [6]:
system_prompt = f"""You are a music style interpretation agent.

You are given an artist style profile extracted from audio analysis.
Your job is to interpret the numerical and categorical traits
into clear musical style constraints.

Artist Style Profile (JSON):
{artist_style}

Tasks:
1. Summarize the artist's core musical identity.
2. Identify non-negotiable traits.
3. Identify flexible traits that can be modified safely.
4. Incorporate the user modifier without breaking artist identity.

Output strictly in JSON with keys:
- core_identity
- fixed_traits
- flexible_traits
- adjusted_style_intent

"""

In [None]:
#agent 1 -> style interpreter

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("GEMINI_API_KEY_1")
)

response = client.chat.completions.create(
  model="xiaomi/mimo-v2-flash:free",
  messages=[
    {"role": "system", "content": system_prompt}]
)

# print(response.choices[0].message.content)

```json
{
  "core_identity": "High-energy electronic dance music with bright, dynamic character. The artist operates in a tight 126-129 BPM tempo range favoring peak club energy. The narrow melodic range suggests tight, repetitive hooks optimized for dancefloor impact rather than complex melodic development.",
  "fixed_traits": [
    "tempo_range: 126–129 BPM",
    "brightness: bright",
    "dynamic_profile: dynamic"
  ],
  "flexible_traits": [
    "melodic_range: narrow (can be stretched to moderate for variation)",
    "avg_tempo: 123.92 BPM (can be adjusted within 126-129 BPM range)"
  ],
  "adjusted_style_intent": "Maintain the core bright, dynamic high-energy dance sound while allowing melodic complexity to expand beyond the narrow range for breakdowns or transitional sections. Keep tempo anchored in the 126-129 BPM sweet spot, though slight variations at the edges are permissible for structural dynamics."
}
```


In [16]:
interpreter_output = response.choices[0].message.content

In [17]:
system_prompt1 = f"""
You are a music composition planning agent.

Using the interpreted artist style intent below,
design a completely original song blueprint.

Constraints:
- Do NOT reuse known melodies or songs.
- Use abstract musical planning only.
- Maintain artist identity through style traits.
- Prioritize originality and coherence.

Style Intent (JSON):
{interpreter_output}

Output strictly in JSON with keys:
- bpm
- key_and_mode
- song_structure
- chord_progression_templates
- rhythmic_intensity
- melodic_constraints
- energy_arc
- sound_palette


"""

In [20]:
load_dotenv(override=True)

key = os.getenv("key4")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [None]:
#agent 2 -> composition planner

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key4")
)

response1 = client.chat.completions.create(
  model="tngtech/tng-r1t-chimera:free",
  messages=[
    {"role": "system", "content": system_prompt1}]
)
print(response1.choices[0].message.content)



```json
{
  "bpm": 127,
  "key_and_mode": {
    "primary": "G major",
    "contrast_section": "D minor (relative minor)"
  },
  "song_structure": {
    "intro": [8, "atmospheric riser with percussive elements"],
    "verse": [16, "tight melodic hook presentation"],
    "build": [8, "filtered chord crescendo + white noise sweep"],
    "drop": [16, "full energy release with layered synths"],
    "breakdown": [16, "expanded melodic development in relative minor"],
    "outro": [8, "rhythmic fade with tailing arpeggiator"]
  },
  "chord_progression_templates": {
    "verse/drop": "I-V-vi-IV (G-D-Em-C) 4-bar loop",
    "breakdown": "i-III-v-VII (Dm-F-Am-C) 8-bar pattern"
  },
  "rhythmic_intensity": {
    "drop_section": {
      "kick": "Four-on-the-floor with double-time ghosts",
      "hihat": "Syncopated 16ths with occasional flams",
      "clap": "Every offbeat with occasional 32nd note rolls"
    },
    "breakdown": {
      "kick": "Half-time pulse with swing feel",
      "percussion