In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv(override=True)

key = os.getenv("GEMINI_API_KEY_1")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [2]:
import librosa
import numpy as np


def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    duration = librosa.get_duration(y=y, sr=sr)
    
    # Beat Tracking
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    beat_density = len(beats) / duration if duration > 0 else 0
    
    # Spectral Features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    
    # Pitch Features
    pitch, voiced_flag, voiced_prob = librosa.pyin(
        y,
        fmin=float(librosa.note_to_hz('C2')),
        fmax=float(librosa.note_to_hz('C7'))
    )
    pitch = pitch[~np.isnan(pitch)]
    pitch_range = np.max(pitch) - np.min(pitch) if len(pitch) > 0 else 0
    
    rms = librosa.feature.rms(y=y)
    dynamic_range = np.max(rms) - np.min(rms)
    
    return {
        "tempo": tempo,
        "beat_density": beat_density,
        "spectral_centroid": spectral_centroid,
        "spectral_bandwidth": spectral_bandwidth,
        "spectral_rolloff": spectral_rolloff,
        "pitch_range": pitch_range,
        "dynamic_range": dynamic_range
    }

In [3]:
import os


def process_artist_folder(folder_path):
    song_features = []

    for file in os.listdir(folder_path):
        if file.lower().endswith(".mp3"):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            song_features.append(features)

    return song_features

In [4]:
def aggregate_artist_style(song_features):
    tempos = [s["tempo"] for s in song_features]
    pitch_ranges = [s["pitch_range"] for s in song_features]
    centroids = [s["spectral_centroid"] for s in song_features]
    dynamics = [s["dynamic_range"] for s in song_features]

    return {
        "tempo_range": f"{int(np.percentile(tempos, 25))}–{int(np.percentile(tempos, 75))} BPM",
        "avg_tempo": np.mean(tempos),
        "melodic_range": "narrow" if np.mean(pitch_ranges) < 800 else "wide",
        "brightness": "dark" if np.mean(centroids) < 2500 else "bright",
        "dynamic_profile": "compressed" if np.mean(dynamics) < 0.05 else "dynamic"
    }


In [5]:
folder = "ip_audio/sickick"
song_data = process_artist_folder(folder)
artist_style = aggregate_artist_style(song_data)

print(artist_style)


{'tempo_range': '126–129 BPM', 'avg_tempo': np.float64(123.92352475128371), 'melodic_range': 'narrow', 'brightness': 'bright', 'dynamic_profile': 'dynamic'}


In [6]:
system_prompt = f"""You are a music style interpretation agent.

You are given an artist style profile extracted from audio analysis.
Your job is to interpret the numerical and categorical traits
into clear musical style constraints.

Artist Style Profile (JSON):
{artist_style}

Tasks:
1. Summarize the artist's core musical identity.
2. Identify non-negotiable traits.
3. Identify flexible traits that can be modified safely.
4. Incorporate the user modifier without breaking artist identity.

Output strictly in JSON with keys:
- core_identity
- fixed_traits
- flexible_traits
- adjusted_style_intent

"""

In [None]:
#agent 1 -> style interpreter

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("GEMINI_API_KEY_1")
)

response = client.chat.completions.create(
  model="xiaomi/mimo-v2-flash:free",
  messages=[
    {"role": "system", "content": system_prompt}]
)

# print(response.choices[0].message.content)

```json
{
  "core_identity": "High-energy electronic dance music with bright, dynamic character. The artist operates in a tight 126-129 BPM tempo range favoring peak club energy. The narrow melodic range suggests tight, repetitive hooks optimized for dancefloor impact rather than complex melodic development.",
  "fixed_traits": [
    "tempo_range: 126–129 BPM",
    "brightness: bright",
    "dynamic_profile: dynamic"
  ],
  "flexible_traits": [
    "melodic_range: narrow (can be stretched to moderate for variation)",
    "avg_tempo: 123.92 BPM (can be adjusted within 126-129 BPM range)"
  ],
  "adjusted_style_intent": "Maintain the core bright, dynamic high-energy dance sound while allowing melodic complexity to expand beyond the narrow range for breakdowns or transitional sections. Keep tempo anchored in the 126-129 BPM sweet spot, though slight variations at the edges are permissible for structural dynamics."
}
```


In [16]:
interpreter_output = response.choices[0].message.content

In [17]:
system_prompt1 = f"""
You are a music composition planning agent.

Using the interpreted artist style intent below,
design a completely original song blueprint.

Constraints:
- Do NOT reuse known melodies or songs.
- Use abstract musical planning only.
- Maintain artist identity through style traits.
- Prioritize originality and coherence.

Style Intent (JSON):
{interpreter_output}

Output strictly in JSON with keys:
- bpm
- key_and_mode
- song_structure
- chord_progression_templates
- rhythmic_intensity
- melodic_constraints
- energy_arc
- sound_palette


"""

In [20]:
load_dotenv(override=True)

key = os.getenv("key4")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [None]:
#agent 2 -> composition planner

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key4")
)

response1 = client.chat.completions.create(
  model="tngtech/deepseek-r1t2-chimera:free",
  messages=[
    {"role": "system", "content": system_prompt1}]
)
# print(response1.choices[0].message.content)



```json
{
  "bpm": 128,
  "key_and_mode": "D major",
  "song_structure": {
    "intro": ["8-bar riser with white noise sweep and percussive elements"],
    "drop_1": ["16-bar main energy peak"],
    "breakdown": ["16-bar melodic development section"],
    "buildup": ["8-bar increasing tension with rhythmic acceleration"],
    "drop_2": ["16-bar intensified variation"],
    "bridge": ["8-bar modulated chord progression"],
    "outro": ["8-bar decelerating rhythm with filtered synths"]
  },
  "chord_progression_templates": {
    "drop": "I-V-vi-III (D-A-Bm-G) staccato seventh chords",
    "breakdown": "vi-IV-I-ii (Bm-G-D-Em) sustained with arpeggiated tops",
    "bridge": "III-vi-IV-V (G-Bm-G-A) with added ninth extensions"
  },
  "rhythmic_intensity": {
    "intro": "Four-on-floor kick with offbeat hi-hats",
    "drop": "Driving kick pattern with syncopated bass rhythm and snare backbeats",
    "breakdown": "Half-time feel with clap-driven percussion",
    "buildup": "Reverse cymbal c

In [24]:
composition_plan = response1.choices[0].message.content

In [25]:
system_prompt2 = f"""
You are an originality and copyright safety agent.

Review the following song blueprint and evaluate:
1. Risk of similarity to existing songs
2. Overuse of common progressions
3. Whether the composition is safely original

Song Blueprint (JSON):
{composition_plan}

Output strictly in JSON:
- originality_score (0–100)
- risk_level (low / medium / high)
- issues_detected
- improvement_suggestions
"""

In [26]:
load_dotenv(override=True)

key = os.getenv("key5")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [None]:
#agent 3 -> originality evaluator
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key5")
)

response11 = client.chat.completions.create(
  model="nvidia/nemotron-3-nano-30b-a3b:free",
  messages=[
    {"role": "system", "content": system_prompt2}]
)

# print(response11.choices[0].message.content)


{
  "originality_score": 45,
  "risk_level": "medium",
  "issues_detected": [
    "Heavy reliance on I-V-vi-III progression common in pop/EDM",
    "Breakdown uses vi-IV-I-ii progression also widely used",
    "Bridge progression III-vi-IV-V is a predictable diatonic sequence",
    "Drop hook limited to 3-note motif within perfect fourth, typical of many tracks",
    "Melodic constraints favor pentatonic and major seventh tension, which are frequent tropes",
    "Structure follows standard EDM formula with predictable build-up/drop patterns",
    "Sound palette recycles supersaw, FM bass, and side‑chained pads that are industry standards"
  ],
  "improvement_suggestions": [
    "Replace the I-V-vi-III progression with a less common modal mixture or borrowed chord (e.g., introduce a minor iv or a secondary dominant)",
    "Experiment with asymmetric phrase lengths or off‑beat structural markers to break predictability",
    "Expand melodic range beyond the perfect fourth, perhaps using 

In [29]:
originality_evaluation = response11.choices[0].message.content

In [30]:
system_prompt3 = f"""
You are an explainability agent for a music AI system.

Explain how the final song blueprint reflects the artist style,
without referencing any specific songs.

Inputs:
Artist Style Profile (JSON):
{artist_style}

Final Song Blueprint (JSON):
{composition_plan}

Explain:
- Tempo choice
- Harmonic character
- Structure decisions
- Sound design choices

"""

In [33]:
load_dotenv(override=True)

key = os.getenv("key6")
print("KEY:", key)
print("Starts with sk-or-:", key.startswith("sk-or-") if key else None)

KEY: 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
Starts with sk-or-: True


In [34]:
client1 = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("key6")
)

response112 = client1.chat.completions.create(
  model="xiaomi/mimo-v2-flash:free",
  messages=[
    {"role": "system", "content": system_prompt3}]
)

print(response112.choices[0].message.content)

Here is how the final song blueprint reflects the artist style:

**Tempo Choice**
The selected BPM of 128 aligns perfectly with the artist's typical performance range of 126–129 BPM. While the artist's historical average is slightly lower at 123.9 BPM, the blueprint selects a value that sits at the upper end of their comfort zone to maximize the driving energy of the track while staying stylistically consistent.

**Harmonic Character**
The blueprint creates a "bright" tonal landscape by utilizing the D major key, which aligns with the artist's preference for uplifting and radiant moods. The harmonic progressions avoid dense, dark clusters in favor of clear functional harmony. The drop uses a standard pop progression (I-V-vi-III), maintaining a "narrow" melodic range that focuses on accessibility and immediate impact, rather than sprawling chromaticism. The bridge introduces "ninth extensions," adding a layer of harmonic sophistication that supports the dynamic profile without breaking 

In [36]:
print("t --- IGNORE ---")

t --- IGNORE ---
